? build
? dist
? META-INF
? prj
? src/java/org/apache/lucene/index/SegmentTermPositionVector.java
? src/java/org/apache/lucene/index/TermVectorOffsetInfo.java
cvs server: Diffing .
cvs server: Diffing docs
cvs server: Diffing docs/images
cvs server: Diffing docs/lucene-sandbox
cvs server: Diffing docs/lucene-sandbox/larm
cvs server: Diffing lib
cvs server: Diffing src
cvs server: Diffing src/demo
cvs server: Diffing src/demo/org
cvs server: Diffing src/demo/org/apache
cvs server: Diffing src/demo/org/apache/lucene
cvs server: Diffing src/demo/org/apache/lucene/demo
cvs server: Diffing src/demo/org/apache/lucene/demo/html
cvs server: Diffing src/java
cvs server: Diffing src/java/org
cvs server: Diffing src/java/org/apache
cvs server: Diffing src/java/org/apache/lucene
cvs server: Diffing src/java/org/apache/lucene/analysis
cvs server: Diffing src/java/org/apache/lucene/analysis/de
cvs server: Diffing src/java/org/apache/lucene/analysis/ru
cvs server: Diffing src/java/org/apache/lucene/analysis/standard
cvs server: Diffing src/java/org/apache/lucene/document
Index: src/java/org/apache/lucene/document/Field.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/document/Field.java,v
retrieving revision 1.16
diff -u -r1.16 Field.java
--- src/java/org/apache/lucene/document/Field.java 17 Aug 2004 20:22:33 -0000 1.16
+++ src/java/org/apache/lucene/document/Field.java 19 Aug 2004 11:57:25 -0000
@@ -16,11 +16,11 @@
* limitations under the License.
*/
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Similarity;
+
import java.io.Reader;
-import java.util.Date;
-import org.apache.lucene.index.IndexReader; // for javadoc
-import org.apache.lucene.search.Similarity; // for javadoc
-import org.apache.lucene.search.Hits; // for javadoc
+import java.util.Date; // for javadoc
/**
A field is a section of a Document. Each field has two parts, a name and a
@@ -34,6 +34,8 @@
private String name = "body";
private String stringValue = null;
private boolean storeTermVector = false;
+ private boolean storeOffsetWithTermVector = false;
+ private boolean storePositionWithTermVector = false;
private Reader readerValue = null;
private boolean isStored = false;
private boolean isIndexed = true;
@@ -93,7 +95,7 @@
and is stored in the index, for return with hits. Useful for short text
fields, like "title" or "subject". Term vector will not be stored for this field. */
public static final Field Text(String name, String value) {
- return Text(name, value, false);
+ return Text(name, value, false, false, false);
}
/** Constructs a Date-valued Field that is not tokenized and is indexed,
@@ -101,39 +103,59 @@
public static final Field Keyword(String name, Date value) {
return new Field(name, DateField.dateToString(value), true, true, false);
}
+
+ public static final Field Text(String name, String value, boolean storeTermVector) {
+ return Text(name, value, storeTermVector, false, false);
+ }
/** Constructs a String-valued Field that is tokenized and indexed,
and is stored in the index, for return with hits. Useful for short text
fields, like "title" or "subject". */
- public static final Field Text(String name, String value, boolean storeTermVector) {
- return new Field(name, value, true, true, true, storeTermVector);
+ public static final Field Text(String name, String value, boolean storeTermVector,
+ boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
+ return new Field(name, value, true, true, true, storeTermVector, storePositionWithTermVector,
+ storeOffsetWithTermVector);
}
/** Constructs a String-valued Field that is tokenized and indexed,
but that is not stored in the index. Term vector will not be stored for this field. */
public static final Field UnStored(String name, String value) {
- return UnStored(name, value, false);
+ return UnStored(name, value, false, false, false);
+ }
+
+ public static final Field UnStored(String name, String value, boolean storeTermVector) {
+ return UnStored(name, value, storeTermVector, false, false);
}
/** Constructs a String-valued Field that is tokenized and indexed,
but that is not stored in the index. */
- public static final Field UnStored(String name, String value, boolean storeTermVector) {
- return new Field(name, value, false, true, true, storeTermVector);
+ public static final Field UnStored(String name, String value, boolean storeTermVector,
+ boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
+ return new Field(name, value, false, true, true, storeTermVector, storePositionWithTermVector,
+ storeOffsetWithTermVector);
}
/** Constructs a Reader-valued Field that is tokenized and indexed, but is
not stored in the index verbatim. Useful for longer text fields, like
"body". Term vector will not be stored for this field. */
public static final Field Text(String name, Reader value) {
- return Text(name, value, false);
+ return Text(name, value, false, false, false);
}
+ public static final Field Text(String name, Reader value, boolean storeTermVector) {
+ return Text(name, value, storeTermVector, false, false);
+ }
/** Constructs a Reader-valued Field that is tokenized and indexed, but is
not stored in the index verbatim. Useful for longer text fields, like
- "body". */
- public static final Field Text(String name, Reader value, boolean storeTermVector) {
+ "body".
+
+ */
+ public static final Field Text(String name, Reader value, boolean storeTermVector,
+ boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
Field f = new Field(name, value);
f.storeTermVector = storeTermVector;
+ f.storePositionWithTermVector = storePositionWithTermVector;
+ f.storeOffsetWithTermVector = storeOffsetWithTermVector;
return f;
}
@@ -154,7 +176,7 @@
*/
public Field(String name, String string,
boolean store, boolean index, boolean token) {
- this(name, string, store, index, token, false);
+ this(name, string, store, index, token, false, false, false);
}
/**
@@ -165,9 +187,17 @@
* @param index true if the field should be indexed
* @param token true if the field should be tokenized
* @param storeTermVector true if we should store the Term Vector info
+ * @param storePositionWithTermVector true if we should store the Token position information with the document
+ * @param storeOffsetWithTermVector true if we should store the Token character offset information with the document
+ *
+ * @see org.apache.lucene.analysis.Token
+ * @see org.apache.lucene.analysis.Token#startOffset()
+ * @see org.apache.lucene.analysis.Token#endOffset()
+ * @see org.apache.lucene.analysis.Token#getPositionIncrement()
*/
public Field(String name, String string,
- boolean store, boolean index, boolean token, boolean storeTermVector) {
+ boolean store, boolean index, boolean token, boolean storeTermVector,
+ boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
if (name == null)
throw new IllegalArgumentException("name cannot be null");
if (string == null)
@@ -181,6 +211,9 @@
this.isIndexed = index;
this.isTokenized = token;
this.storeTermVector = storeTermVector;
+ this.storePositionWithTermVector = storePositionWithTermVector;
+ this.storeOffsetWithTermVector = storeOffsetWithTermVector;
+
}
Field(String name, Reader reader) {
@@ -217,6 +250,16 @@
*/
public final boolean isTermVectorStored() { return storeTermVector; }
+ public boolean isStoreOffsetWithTermVector()
+ {
+ return storeOffsetWithTermVector;
+ }
+
+ public boolean isStorePositionWithTermVector()
+ {
+ return storePositionWithTermVector;
+ }
+
/** Prints a Field for human consumption. */
public final String toString() {
StringBuffer result = new StringBuffer();
@@ -236,6 +279,16 @@
if (result.length() > 0)
result.append(",");
result.append("termVector");
+ }
+ if (storeOffsetWithTermVector) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVectorOffsets");
+ }
+ if (storePositionWithTermVector) {
+ if (result.length() > 0)
+ result.append(",");
+ result.append("termVectorPosition");
}
result.append('<');
result.append(name);
cvs server: Diffing src/java/org/apache/lucene/index
Index: src/java/org/apache/lucene/index/DocumentWriter.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/DocumentWriter.java,v
retrieving revision 1.12
diff -u -r1.12 DocumentWriter.java
--- src/java/org/apache/lucene/index/DocumentWriter.java 10 Jul 2004 06:19:01 -0000 1.12
+++ src/java/org/apache/lucene/index/DocumentWriter.java 19 Aug 2004 11:57:25 -0000
@@ -16,21 +16,21 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Hashtable;
-import java.util.Enumeration;
-import java.util.Arrays;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.OutputStream;
-import org.apache.lucene.search.Similarity;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.Enumeration;
+import java.util.Hashtable;
final class DocumentWriter {
private Analyzer analyzer;
@@ -125,7 +125,7 @@
if (field.isIndexed()) {
if (!field.isTokenized()) { // un-tokenized field
- addPosition(fieldName, field.stringValue(), position++);
+ addPosition(fieldName, field.stringValue(), position++, new TermVectorOffsetInfo(-1,-1));
length++;
} else {
Reader reader; // find or make Reader
@@ -142,7 +142,7 @@
try {
for (Token t = stream.next(); t != null; t = stream.next()) {
position += (t.getPositionIncrement() - 1);
- addPosition(fieldName, t.termText(), position++);
+ addPosition(fieldName, t.termText(), position++, new TermVectorOffsetInfo(t.startOffset(), t.endOffset()));
if (++length > maxFieldLength) break;
}
} finally {
@@ -159,8 +159,9 @@
private final Term termBuffer = new Term("", ""); // avoid consing
- private final void addPosition(String field, String text, int position) {
+ private final void addPosition(String field, String text, int position, TermVectorOffsetInfo offset) {
termBuffer.set(field, text);
+ //System.out.println("Offset: " + offset);
Posting ti = (Posting) postingTable.get(termBuffer);
if (ti != null) { // word seen before
int freq = ti.freq;
@@ -172,10 +173,23 @@
ti.positions = newPositions;
}
ti.positions[freq] = position; // add new position
+
+ if (offset != null) {
+ if (ti.offsets.length == freq){
+ TermVectorOffsetInfo [] newOffsets = new TermVectorOffsetInfo[freq*2];
+ TermVectorOffsetInfo [] offsets = ti.offsets;
+ for (int i = 0; i < freq; i++)
+ {
+ newOffsets[i] = offsets[i];
+ }
+ ti.offsets = newOffsets;
+ }
+ ti.offsets[freq] = offset;
+ }
ti.freq = freq + 1; // update frequency
} else { // word not seen before
Term term = new Term(field, text, false);
- postingTable.put(term, new Posting(term, position));
+ postingTable.put(term, new Posting(term, position, offset));
}
}
@@ -294,12 +308,13 @@
termVectorWriter.openDocument();
}
termVectorWriter.openField(currentField);
+
} else if (termVectorWriter != null) {
termVectorWriter.closeField();
}
}
if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
- termVectorWriter.addTerm(posting.term.text(), postingFreq);
+ termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions, posting.offsets);
}
}
if (termVectorWriter != null)
@@ -336,11 +351,14 @@
Term term; // the Term
int freq; // its frequency in doc
int[] positions; // positions it occurs at
+ TermVectorOffsetInfo [] offsets;
- Posting(Term t, int position) {
+ Posting(Term t, int position, TermVectorOffsetInfo offset) {
term = t;
freq = 1;
positions = new int[1];
positions[0] = position;
+ offsets = new TermVectorOffsetInfo[1];
+ offsets[0] = offset;
}
}
Index: src/java/org/apache/lucene/index/FieldInfo.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/FieldInfo.java,v
retrieving revision 1.3
diff -u -r1.3 FieldInfo.java
--- src/java/org/apache/lucene/index/FieldInfo.java 29 Mar 2004 22:48:02 -0000 1.3
+++ src/java/org/apache/lucene/index/FieldInfo.java 19 Aug 2004 11:57:25 -0000
@@ -23,11 +23,16 @@
// true if term vector for this field should be stored
boolean storeTermVector;
+ boolean storeOffsetWithTermVector = false;
+ boolean storePositionWithTermVector = false;
- FieldInfo(String na, boolean tk, int nu, boolean storeTermVector) {
+ FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
+ boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
name = na;
isIndexed = tk;
number = nu;
this.storeTermVector = storeTermVector;
+ this.storeOffsetWithTermVector = storeOffsetWithTermVector;
+ this.storePositionWithTermVector = storePositionWithTermVector;
}
}
Index: src/java/org/apache/lucene/index/FieldInfos.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/FieldInfos.java,v
retrieving revision 1.9
diff -u -r1.9 FieldInfos.java
--- src/java/org/apache/lucene/index/FieldInfos.java 17 Aug 2004 20:38:45 -0000 1.9
+++ src/java/org/apache/lucene/index/FieldInfos.java 19 Aug 2004 11:57:25 -0000
@@ -16,15 +16,14 @@
* limitations under the License.
*/
-import java.util.*;
-import java.io.IOException;
-
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.OutputStream;
import org.apache.lucene.store.InputStream;
+import org.apache.lucene.store.OutputStream;
+
+import java.io.IOException;
+import java.util.*;
/** Access to the Field Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Field Info file. Objects
@@ -61,7 +60,8 @@
Enumeration fields = doc.fields();
while (fields.hasMoreElements()) {
Field field = (Field) fields.nextElement();
- add(field.name(), field.isIndexed(), field.isTermVectorStored());
+ add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
+ field.isStoreOffsetWithTermVector());
}
}
@@ -69,10 +69,11 @@
* @param names The names of the fields
* @param storeTermVectors Whether the fields store term vectors or not
*/
- public void addIndexed(Collection names, boolean storeTermVectors) {
+ public void addIndexed(Collection names, boolean storeTermVectors, boolean storePositionWithTermVector,
+ boolean storeOffsetWithTermVector) {
Iterator i = names.iterator();
while (i.hasNext()) {
- add((String)i.next(), true, storeTermVectors);
+ add((String)i.next(), true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
}
}
@@ -94,13 +95,15 @@
* Calls three parameter add with false for the storeTermVector parameter
* @param name The name of the Field
* @param isIndexed true if the field is indexed
- * @see #add(String, boolean, boolean)
+ * @see #add(String, boolean, boolean, boolean, boolean)
*/
public void add(String name, boolean isIndexed) {
- add(name, isIndexed, false);
+ add(name, isIndexed, false, false, false);
}
-
+ public void add(String name, boolean isIndexed, boolean storeTermVector){
+ add(name, isIndexed, storeTermVector, false, false);
+ }
/** If the field is not yet known, adds it. If it is known, checks to make
* sure that the isIndexed flag is the same as was given previously for this
* field. If not - marks it as being indexed. Same goes for storeTermVector
@@ -109,10 +112,11 @@
* @param isIndexed true if the field is indexed
* @param storeTermVector true if the term vector should be stored
*/
- public void add(String name, boolean isIndexed, boolean storeTermVector) {
+ public void add(String name, boolean isIndexed, boolean storeTermVector,
+ boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
FieldInfo fi = fieldInfo(name);
if (fi == null) {
- addInternal(name, isIndexed, storeTermVector);
+ addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector);
} else {
if (fi.isIndexed != isIndexed) {
fi.isIndexed = true; // once indexed, always index
@@ -120,13 +124,21 @@
if (fi.storeTermVector != storeTermVector) {
fi.storeTermVector = true; // once vector, always vector
}
+ if (fi.storePositionWithTermVector != storePositionWithTermVector) {
+ fi.storePositionWithTermVector = true; // once vector, always vector
+ }
+ if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector) {
+ fi.storeOffsetWithTermVector = true; // once vector, always vector
+ }
}
}
private void addInternal(String name, boolean isIndexed,
- boolean storeTermVector) {
+ boolean storeTermVector, boolean storePositionWithTermVector,
+ boolean storeOffsetWithTermVector) {
FieldInfo fi =
- new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector);
+ new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
+ storeOffsetWithTermVector);
byNumber.add(fi);
byName.put(name, fi);
}
@@ -180,6 +192,8 @@
byte bits = 0x0;
if (fi.isIndexed) bits |= 0x1;
if (fi.storeTermVector) bits |= 0x2;
+ if (fi.storePositionWithTermVector) bits |= 0x4;
+ if (fi.storeOffsetWithTermVector) bits |= 0x8;
output.writeString(fi.name);
//Was REMOVE
//output.writeByte((byte)(fi.isIndexed ? 1 : 0));
@@ -194,7 +208,9 @@
byte bits = input.readByte();
boolean isIndexed = (bits & 0x1) != 0;
boolean storeTermVector = (bits & 0x2) != 0;
- addInternal(name, isIndexed, storeTermVector);
+ boolean storePositionsWithTermVector = (bits & 0x4) != 0;
+ boolean storeOffsetWithTermVector = (bits & 0x8) != 0;
+ addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector);
}
}
Index: src/java/org/apache/lucene/index/FieldsReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/FieldsReader.java,v
retrieving revision 1.7
diff -u -r1.7 FieldsReader.java
--- src/java/org/apache/lucene/index/FieldsReader.java 29 Mar 2004 22:48:02 -0000 1.7
+++ src/java/org/apache/lucene/index/FieldsReader.java 19 Aug 2004 11:57:26 -0000
@@ -16,12 +16,12 @@
* limitations under the License.
*/
-import java.io.IOException;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.InputStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+
+import java.io.IOException;
/**
* Class responsible for access to stored document fields.
@@ -71,7 +71,8 @@
fieldsStream.readString(), // read value
true, // stored
fi.isIndexed, // indexed
- (bits & 1) != 0, fi.storeTermVector)); // vector
+ (bits & 1) != 0, fi.storeTermVector,
+ fi.storePositionWithTermVector, fi.storeOffsetWithTermVector)); // vector
}
return doc;
Index: src/java/org/apache/lucene/index/FilterIndexReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/FilterIndexReader.java,v
retrieving revision 1.12
diff -u -r1.12 FilterIndexReader.java
--- src/java/org/apache/lucene/index/FilterIndexReader.java 14 Jun 2004 00:15:24 -0000 1.12
+++ src/java/org/apache/lucene/index/FilterIndexReader.java 19 Aug 2004 11:57:26 -0000
@@ -16,11 +16,11 @@
* limitations under the License.
*/
+import org.apache.lucene.document.Document;
+
import java.io.IOException;
import java.util.Collection;
-import org.apache.lucene.document.Document;
-
/** A FilterIndexReader contains another IndexReader, which it
* uses as its basic source of data, possibly transforming the data along the
* way or providing additional functionality. The class
@@ -145,5 +145,9 @@
*/
public Collection getIndexedFieldNames(boolean storedTermVector) {
return in.getIndexedFieldNames(storedTermVector);
+ }
+
+ public Collection getTermVectorFieldNames(boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
+ return in.getTermVectorFieldNames(storePositionWithTermVector, storeOffsetWithTermVector);
}
}
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v
retrieving revision 1.35
diff -u -r1.35 IndexReader.java
--- src/java/org/apache/lucene/index/IndexReader.java 15 Aug 2004 20:49:30 -0000 1.35
+++ src/java/org/apache/lucene/index/IndexReader.java 19 Aug 2004 11:57:26 -0000
@@ -16,16 +16,16 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.io.File;
-import java.util.Collection;
-
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Lock;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field; // for javadoc
-import org.apache.lucene.search.Similarity;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
/** IndexReader is an abstract class, providing an interface for accessing an
index. Search of an index is done entirely through this abstract interface,
@@ -554,6 +554,26 @@
* @return Collection of Strings indicating the names of the fields
*/
public abstract Collection getIndexedFieldNames(boolean storedTermVector);
+
+ /**
+ * Get all FieldNames that have term vector information, as well as position and/or offset information
+ * @param storePositionWithTermVector
+ * @param storeOffsetWithTermVector
+ * @return
+ */
+ public abstract Collection getTermVectorFieldNames(boolean storePositionWithTermVector,
+ boolean storeOffsetWithTermVector);
+
+ /**
+ *
+ * @param storedTermVector
+ * @return Collection of Strings indicating the names of the fields
+ * @see #getIndexedFieldNames(boolean, boolean, boolean) with the last two as false and false
+ */
+ /*public Collection getIndexedFieldNames(boolean storedTermVector)
+ {
+ return getIndexedFieldNames(storedTermVector, false, false);
+ }*/
/**
* Returns true iff the index in the named directory is
Index: src/java/org/apache/lucene/index/MultiReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/MultiReader.java,v
retrieving revision 1.8
diff -u -r1.8 MultiReader.java
--- src/java/org/apache/lucene/index/MultiReader.java 6 Aug 2004 20:50:29 -0000 1.8
+++ src/java/org/apache/lucene/index/MultiReader.java 19 Aug 2004 11:57:26 -0000
@@ -16,16 +16,12 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Iterator;
-import java.util.Set;
-
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
+import java.io.IOException;
+import java.util.*;
+
/** An IndexReader which reads multiple indexes, appending their content.
*
* @version $Id: MultiReader.java,v 1.8 2004/08/06 20:50:29 dnaber Exp $
@@ -248,6 +244,17 @@
for (int i = 0; i < subReaders.length; i++) {
IndexReader reader = subReaders[i];
Collection names = reader.getIndexedFieldNames(storedTermVector);
+ fieldSet.addAll(names);
+ }
+ return fieldSet;
+ }
+
+ public Collection getTermVectorFieldNames(boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
+ // maintain a unique set of field names
+ Set fieldSet = new HashSet();
+ for (int i = 0; i < subReaders.length; i++) {
+ IndexReader reader = subReaders[i];
+ Collection names = reader.getTermVectorFieldNames(storePositionWithTermVector, storeOffsetWithTermVector);
fieldSet.addAll(names);
}
return fieldSet;
Index: src/java/org/apache/lucene/index/SegmentMerger.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java,v
retrieving revision 1.14
diff -u -r1.14 SegmentMerger.java
--- src/java/org/apache/lucene/index/SegmentMerger.java 15 Aug 2004 11:26:05 -0000 1.14
+++ src/java/org/apache/lucene/index/SegmentMerger.java 19 Aug 2004 11:57:26 -0000
@@ -16,14 +16,14 @@
* limitations under the License.
*/
-import java.util.Vector;
-import java.util.Iterator;
-import java.io.IOException;
-
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.OutputStream;
import org.apache.lucene.store.RAMOutputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Vector;
+
/**
* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
* into a single Segment. After adding the appropriate readers, call the merge method to combine the
@@ -157,8 +157,11 @@
int docCount = 0;
for (int i = 0; i < readers.size(); i++) {
IndexReader reader = (IndexReader) readers.elementAt(i);
- fieldInfos.addIndexed(reader.getIndexedFieldNames(true), true);
- fieldInfos.addIndexed(reader.getIndexedFieldNames(false), false);
+ //Can only store position and offset information when storing term vectors
+ fieldInfos.addIndexed(reader.getTermVectorFieldNames(true, true), true, true, true);
+ fieldInfos.addIndexed(reader.getTermVectorFieldNames(true, false), true, true, false);
+ fieldInfos.addIndexed(reader.getTermVectorFieldNames(false, false), true, false, false);
+ fieldInfos.addIndexed(reader.getIndexedFieldNames(false), false, false, false);
fieldInfos.add(reader.getFieldNames(false), false);
}
fieldInfos.write(directory, segment + ".fnm");
@@ -211,9 +214,15 @@
termVectorsWriter.openField(termVector.getField());
String [] terms = termVector.getTerms();
int [] freqs = termVector.getTermFrequencies();
+ boolean positionVector = termVector instanceof TermPositionVector ? true : false;
for (int t = 0; t < terms.length; t++) {
- termVectorsWriter.addTerm(terms[t], freqs[t]);
+ if (positionVector == false) {
+ termVectorsWriter.addTerm(terms[t], freqs[t]);
+ } else {
+ termVectorsWriter.addTerm(terms[t], freqs[t], ((TermPositionVector)termVector).getTermPositions(t),
+ ((TermPositionVector)termVector).getOffsets(t));
+ }
}
}
termVectorsWriter.closeDocument();
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v
retrieving revision 1.26
diff -u -r1.26 SegmentReader.java
--- src/java/org/apache/lucene/index/SegmentReader.java 17 Aug 2004 08:56:08 -0000 1.26
+++ src/java/org/apache/lucene/index/SegmentReader.java 19 Aug 2004 11:57:26 -0000
@@ -16,20 +16,15 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Enumeration;
-import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Set;
-import java.util.Vector;
-
import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.InputStream;
import org.apache.lucene.store.OutputStream;
-import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitVector;
+import java.io.IOException;
+import java.util.*;
+
/**
* FIXME: Describe class SegmentReader here.
*
@@ -325,6 +320,26 @@
}
return fieldSet;
+ }
+
+ /**
+ * Get all FieldNames that have term vector information, as well as position and/or offset information
+ *
+ * @param storePositionWithTermVector
+ * @param storeOffsetWithTermVector
+ * @return
+ */
+ public Collection getTermVectorFieldNames(boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
+ // maintain a unique set of field names
+ Set fieldSet = new HashSet();
+ for (int i = 0; i < fieldInfos.size(); i++) {
+ FieldInfo fi = fieldInfos.fieldInfo(i);
+ if (fi.isIndexed == true && fi.storeTermVector == true && fi.storePositionWithTermVector == storePositionWithTermVector
+ && fi.storeOffsetWithTermVector == storeOffsetWithTermVector){
+ fieldSet.add(fi.name);
+ }
+ }
+ return fieldSet;
}
public synchronized byte[] norms(String field) throws IOException {
Index: src/java/org/apache/lucene/index/SegmentTermVector.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermVector.java,v
retrieving revision 1.3
diff -u -r1.3 SegmentTermVector.java
--- src/java/org/apache/lucene/index/SegmentTermVector.java 10 Jul 2004 06:19:01 -0000 1.3
+++ src/java/org/apache/lucene/index/SegmentTermVector.java 19 Aug 2004 11:57:26 -0000
@@ -4,9 +4,9 @@
/**
*/
class SegmentTermVector implements TermFreqVector {
- private String field;
- private String terms[];
- private int termFreqs[];
+ protected String field;
+ protected String terms[];
+ protected int termFreqs[];
SegmentTermVector(String field, String terms[], int termFreqs[]) {
this.field = field;
Index: src/java/org/apache/lucene/index/TermPositionVector.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/TermPositionVector.java,v
retrieving revision 1.1
diff -u -r1.1 TermPositionVector.java
--- src/java/org/apache/lucene/index/TermPositionVector.java 20 Feb 2004 20:14:55 -0000 1.1
+++ src/java/org/apache/lucene/index/TermPositionVector.java 19 Aug 2004 11:57:26 -0000
@@ -4,10 +4,20 @@
* positions in which each of the terms is found.
*/
public interface TermPositionVector extends TermFreqVector {
-
+
/** Returns an array of positions in which the term is found.
* Terms are identified by the index at which its number appears in the
- * term number array obtained from getTermNumbers method.
+ * term String array obtained from the indexOf method.
*/
public int[] getTermPositions(int index);
+
+ /**
+ * Returns an array of TermVectorOffsetInfo in which the term is found.
+ *
+ * @see org.apache.lucene.analysis.Token
+ *
+ * @param index The position in the array to get the offsets from
+ * @return An array of TermVectorOffsetInfo objects or the empty list
+ */
+ public TermVectorOffsetInfo [] getOffsets(int index);
}
Index: src/java/org/apache/lucene/index/TermVectorsReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/TermVectorsReader.java,v
retrieving revision 1.3
diff -u -r1.3 TermVectorsReader.java
--- src/java/org/apache/lucene/index/TermVectorsReader.java 17 Aug 2004 20:53:16 -0000 1.3
+++ src/java/org/apache/lucene/index/TermVectorsReader.java 19 Aug 2004 11:57:26 -0000
@@ -210,11 +210,16 @@
if (numTerms == 0) return new SegmentTermVector(field, null, null);
tvf.readVInt();
-
+ byte storePosByte = tvf.readByte();
+ byte storeOffByte = tvf.readByte();
+
String terms[] = new String[numTerms];
int termFreqs[] = new int[numTerms];
-
+ int positions[][];
+ TermVectorOffsetInfo offsets[][];//we may not need these, but declare them
+ positions = new int[numTerms][];
+ offsets = new TermVectorOffsetInfo[numTerms][];
int start = 0;
int deltaLength = 0;
int totalLength = 0;
@@ -234,8 +239,36 @@
terms[i] = new String(buffer, 0, totalLength);
previousString = terms[i];
termFreqs[i] = tvf.readVInt();
+ //Next byte is whether we are storing position info, if 1, then we are.
+ byte storingInfo = tvf.readByte();
+ if (storePosByte == 1 && storingInfo == 1)//should only be 1 when storePosInfo is 1
+ { //read in the positions
+ int numPositions = tvf.readVInt();
+ int [] pos = new int[numPositions];
+ positions[i] = pos;
+ for (int j = 0; j < numPositions; j++)
+ {
+ pos[j] = tvf.readVInt();
+ }
+ }
+ storingInfo = tvf.readByte();
+ if (storeOffByte == 1 && storingInfo == 1)
+ {
+ int numOffsets = tvf.readVInt();
+ TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[numOffsets];
+ offsets[i] = offs;
+ for (int j = 0; j < numOffsets; j++) {
+ offs[j] = new TermVectorOffsetInfo(tvf.readVInt(), tvf.readVInt());
+ }
+ }
+ }
+ SegmentTermVector tv;
+ if (storePosByte == 1 || storeOffByte == 1){
+ tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
+ }
+ else {
+ tv = new SegmentTermVector(field, terms, termFreqs);
}
- SegmentTermVector tv = new SegmentTermVector(field, terms, termFreqs);
return tv;
}
Index: src/java/org/apache/lucene/index/TermVectorsWriter.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java,v
retrieving revision 1.2
diff -u -r1.2 TermVectorsWriter.java
--- src/java/org/apache/lucene/index/TermVectorsWriter.java 17 Aug 2004 20:53:16 -0000 1.2
+++ src/java/org/apache/lucene/index/TermVectorsWriter.java 19 Aug 2004 11:57:26 -0000
@@ -65,16 +65,9 @@
private TVField currentField = null;
private long currentDocPointer = -1;
-
- /** Create term vectors writer for the specified segment in specified
- * directory. A new TermVectorsWriter should be created for each
- * segment. The parameter maxFields indicates how many total
- * fields are found in this document. Not all of these fields may require
- * termvectors to be stored, so the number of calls to
- * openField is less or equal to this number.
- */
- public TermVectorsWriter(Directory directory, String segment,
- FieldInfos fieldInfos)
+
+
+ public TermVectorsWriter(Directory directory, String segment, FieldInfos fieldInfos)
throws IOException {
// Open files for TermVector storage
tvx = directory.createFile(segment + TVX_EXTENSION);
@@ -83,12 +76,12 @@
tvd.writeInt(FORMAT_VERSION);
tvf = directory.createFile(segment + TVF_EXTENSION);
tvf.writeInt(FORMAT_VERSION);
-
this.fieldInfos = fieldInfos;
fields = new Vector(fieldInfos.size());
terms = new Vector();
}
+
public final void openDocument()
throws IOException {
@@ -124,7 +117,9 @@
if (!isDocumentOpen()) throw new IllegalStateException("Cannot open field when no document is open.");
closeField();
- currentField = new TVField(fieldInfos.fieldNumber(field));
+ FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+ currentField = new TVField(fieldInfo.number, fieldInfo.storePositionWithTermVector,
+ fieldInfo.storeOffsetWithTermVector);
}
/** Finished processing current field. This should be followed by a call to
@@ -160,14 +155,21 @@
if (!isDocumentOpen()) throw new IllegalStateException("Cannot add terms when document is not open");
if (!isFieldOpen()) throw new IllegalStateException("Cannot add terms when field is not open");
- addTermInternal(termText, freq);
+ addTermInternal(termText, freq, null, null);
+ }
+
+ public final void addTerm(String termText, int freq, int [] positions, TermVectorOffsetInfo [] offsets)
+ {
+ addTermInternal(termText, freq, positions, offsets);
}
- private final void addTermInternal(String termText, int freq) {
+ private final void addTermInternal(String termText, int freq, int [] positions, TermVectorOffsetInfo [] offsets) {
currentField.length += freq;
TVTerm term = new TVTerm();
term.termText = termText;
term.freq = freq;
+ term.positions = positions;
+ term.offsets = offsets;
terms.add(term);
}
@@ -197,16 +199,47 @@
addTermFreqVectorInternal(vector);
}
+ /** Add specified vectors to the document.
+ */
+ public final void addPositionVectors(TermPositionVector[] vectors)
+ throws IOException {
+ if (!isDocumentOpen()) throw new IllegalStateException("Cannot add term vectors when document is not open");
+ if (isFieldOpen()) throw new IllegalStateException("Cannot add term vectors when field is open");
+
+ for (int i = 0; i < vectors.length; i++) {
+ addTermPositionVector(vectors[i]);
+ }
+ }
+
+
+ /** Add specified vector to the document. Document must be open but no field
+ * should be open or exception is thrown. The same document can have addTerm
+ * and addVectors calls mixed, however a given field must either be
+ * populated with addTerm or with addVector. *
+ */
+ public final void addTermPositionVector(TermPositionVector vector)
+ throws IOException {
+ if (!isDocumentOpen()) throw new IllegalStateException("Cannot add term vector when document is not open");
+ if (isFieldOpen()) throw new IllegalStateException("Cannot add term vector when field is open");
+ addTermPositionVectorInternal(vector);
+ }
private final void addTermFreqVectorInternal(TermFreqVector vector)
throws IOException {
openField(vector.getField());
for (int i = 0; i < vector.size(); i++) {
- addTermInternal(vector.getTerms()[i], vector.getTermFrequencies()[i]);
+ addTermInternal(vector.getTerms()[i], vector.getTermFrequencies()[i], null, null);
}
closeField();
}
-
+ private final void addTermPositionVectorInternal(TermPositionVector vector)
+ throws IOException {
+ openField(vector.getField());
+ for (int i = 0; i < vector.size(); i++) {
+ addTermInternal(vector.getTerms()[i], vector.getTermFrequencies()[i], vector.getTermPositions(i), vector.getOffsets(i));
+ }
+ closeField();
+ }
/** Close all streams. */
@@ -249,22 +282,101 @@
tvf.writeVInt(size = terms.size());
tvf.writeVInt(currentField.length - size);
+ boolean storePositions = currentField.storePositions;
+ boolean storeOffsets = currentField.storeOffsets;
+ tvf.writeByte(storePositions == true ? (byte)1 :(byte)0);
+ tvf.writeByte(storeOffsets == true ? (byte)1 : (byte)0);
String lastTermText = "";
// write term ids and positions
- for (int i = 0; i < size; i++) {
- TVTerm term = (TVTerm) terms.elementAt(i);
- //tvf.writeString(term.termText);
- int start = StringHelper.stringDifference(lastTermText, term.termText);
- int length = term.termText.length() - start;
- tvf.writeVInt(start); // write shared prefix length
- tvf.writeVInt(length); // write delta length
- tvf.writeChars(term.termText, start, length); // write delta chars
- tvf.writeVInt(term.freq);
- lastTermText = term.termText;
+ //Do it this way, so we don't have to check the flags inside the loop
+ if (storePositions == false && storeOffsets == false)
+ {
+ for (int i = 0; i < size; i++) {
+ TVTerm term = (TVTerm) terms.elementAt(i);
+ //tvf.writeString(term.termText);
+ writeCoreTermInfo(lastTermText, term);
+ writePositions(null, 0);//store the fact that we aren't storing the info
+ writeOffsets(null, 0);
+ lastTermText = term.termText;
+ }
+ }
+ else if (storePositions == true && storeOffsets == false)
+ {
+ for (int i = 0; i < size; i++) {
+ TVTerm term = (TVTerm) terms.elementAt(i);
+ //tvf.writeString(term.termText);
+ writeCoreTermInfo(lastTermText, term);
+ writePositions(term.positions, term.freq);
+ writeOffsets(null, 0);//store the fact that we aren't storing offsets
+ lastTermText = term.termText;
+ }
+ }
+ else if (storePositions == false && storeOffsets == true)
+ {
+ for (int i = 0; i < size; i++) {
+ TVTerm term = (TVTerm) terms.elementAt(i);
+ //tvf.writeString(term.termText);
+ writeCoreTermInfo(lastTermText, term);
+ writePositions(null, 0);
+ writeOffsets(term.offsets, term.freq);
+ lastTermText = term.termText;
+ }
+ }
+ else
+ {
+ for (int i = 0; i < size; i++) {
+ TVTerm term = (TVTerm) terms.elementAt(i);
+ //tvf.writeString(term.termText);
+ writeCoreTermInfo(lastTermText, term);
+ writePositions(term.positions, term.freq);
+ writeOffsets(term.offsets, term.freq);
+ lastTermText = term.termText;
+ }
}
}
-
+ private void writeCoreTermInfo(String lastTermText, TVTerm term) throws IOException {
+ int start = StringHelper.stringDifference(lastTermText, term.termText);
+ int length = term.termText.length() - start;
+ tvf.writeVInt(start); // write shared prefix length
+ tvf.writeVInt(length); // write delta length
+ tvf.writeChars(term.termText, start, length); // write delta chars
+ tvf.writeVInt(term.freq);
+ }
+
+ private void writePositions(int [] positions, int freq) throws IOException
+ {
+ if (positions != null && positions.length > 0)
+ {
+ tvf.writeByte((byte)1);
+ tvf.writeVInt(freq);
+ for (int i = 0; i < freq; i++) {
+ tvf.writeVInt(positions[i]);
+ }
+ }
+ else
+ {
+ tvf.writeByte((byte)0);
+ }
+
+ }
+ private void writeOffsets(TermVectorOffsetInfo [] offsets, int freq) throws IOException
+ {
+ if (offsets != null && offsets.length > 0)
+ {
+ tvf.writeByte((byte)1);
+ tvf.writeVInt(freq);
+
+ for (int i = 0; i < freq; i++) {
+ tvf.writeVInt(offsets[i].getStartOffset());
+ tvf.writeVInt(offsets[i].getEndOffset() - offsets[i].getStartOffset()); //Save the diff between the two.
+ }
+ }
+ else
+ {
+ tvf.writeByte((byte)0);
+ }
+ }
private void writeDoc() throws IOException {
@@ -304,16 +416,20 @@
int number;
long tvfPointer = 0;
int length = 0; // number of distinct term positions
-
- TVField(int number) {
+ boolean storePositions = false;
+ boolean storeOffsets = false;
+ TVField(int number, boolean storePos, boolean storeOff) {
this.number = number;
+ storePositions = storePos;
+ storeOffsets = storeOff;
}
}
private static class TVTerm {
String termText;
int freq = 0;
- //int positions[] = null;
+ int positions[] = null;
+ TermVectorOffsetInfo [] offsets = null;
}
cvs server: Diffing src/java/org/apache/lucene/queryParser
cvs server: Diffing src/java/org/apache/lucene/search
cvs server: Diffing src/java/org/apache/lucene/search/spans
cvs server: Diffing src/java/org/apache/lucene/store
cvs server: Diffing src/java/org/apache/lucene/util
cvs server: Diffing src/jsp
cvs server: Diffing src/jsp/WEB-INF
cvs server: Diffing src/test
cvs server: Diffing src/test/org
cvs server: Diffing src/test/org/apache
cvs server: Diffing src/test/org/apache/lucene
Index: src/test/org/apache/lucene/SearchTestForDuplicates.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/SearchTestForDuplicates.java,v
retrieving revision 1.6
diff -u -r1.6 SearchTestForDuplicates.java
--- src/test/org/apache/lucene/SearchTestForDuplicates.java 29 Mar 2004 22:48:05 -0000 1.6
+++ src/test/org/apache/lucene/SearchTestForDuplicates.java 19 Aug 2004 11:57:26 -0000
@@ -1,96 +1,96 @@
-package org.apache.lucene;
-
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.store.*;
-import org.apache.lucene.document.*;
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.index.*;
-import org.apache.lucene.search.*;
-import org.apache.lucene.queryParser.*;
-
-class SearchTestForDuplicates {
-
- static final String PRIORITY_FIELD ="priority";
- static final String ID_FIELD ="id";
- static final String HIGH_PRIORITY ="high";
- static final String MED_PRIORITY ="medium";
- static final String LOW_PRIORITY ="low";
-
- public static void main(String[] args) {
- try {
- Directory directory = new RAMDirectory();
- Analyzer analyzer = new SimpleAnalyzer();
- IndexWriter writer = new IndexWriter(directory, analyzer, true);
-
- final int MAX_DOCS = 225;
-
- for (int j = 0; j < MAX_DOCS; j++) {
- Document d = new Document();
- d.add(Field.Text(PRIORITY_FIELD, HIGH_PRIORITY));
- d.add(Field.Text(ID_FIELD, Integer.toString(j)));
- writer.addDocument(d);
- }
- writer.close();
-
- // try a search without OR
- Searcher searcher = new IndexSearcher(directory);
- Hits hits = null;
-
- QueryParser parser = new QueryParser(PRIORITY_FIELD, analyzer);
-
- Query query = parser.parse(HIGH_PRIORITY);
- System.out.println("Query: " + query.toString(PRIORITY_FIELD));
-
- hits = searcher.search(query);
- printHits(hits);
-
- searcher.close();
-
- // try a new search with OR
- searcher = new IndexSearcher(directory);
- hits = null;
-
- parser = new QueryParser(PRIORITY_FIELD, analyzer);
-
- query = parser.parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
- System.out.println("Query: " + query.toString(PRIORITY_FIELD));
-
- hits = searcher.search(query);
- printHits(hits);
-
- searcher.close();
-
- } catch (Exception e) {
- System.out.println(" caught a " + e.getClass() +
- "\n with message: " + e.getMessage());
- }
- }
-
- private static void printHits( Hits hits ) throws IOException {
- System.out.println(hits.length() + " total results\n");
- for (int i = 0 ; i < hits.length(); i++) {
- if ( i < 10 || (i > 94 && i < 105) ) {
- Document d = hits.doc(i);
- System.out.println(i + " " + d.get(ID_FIELD));
- }
- }
- }
-
-}
+package org.apache.lucene;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.*;
+import org.apache.lucene.document.*;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.*;
+import org.apache.lucene.queryParser.*;
+
+class SearchTestForDuplicates {
+
+ static final String PRIORITY_FIELD ="priority";
+ static final String ID_FIELD ="id";
+ static final String HIGH_PRIORITY ="high";
+ static final String MED_PRIORITY ="medium";
+ static final String LOW_PRIORITY ="low";
+
+ public static void main(String[] args) {
+ try {
+ Directory directory = new RAMDirectory();
+ Analyzer analyzer = new SimpleAnalyzer();
+ IndexWriter writer = new IndexWriter(directory, analyzer, true);
+
+ final int MAX_DOCS = 225;
+
+ for (int j = 0; j < MAX_DOCS; j++) {
+ Document d = new Document();
+ d.add(Field.Text(PRIORITY_FIELD, HIGH_PRIORITY));
+ d.add(Field.Text(ID_FIELD, Integer.toString(j)));
+ writer.addDocument(d);
+ }
+ writer.close();
+
+ // try a search without OR
+ Searcher searcher = new IndexSearcher(directory);
+ Hits hits = null;
+
+ QueryParser parser = new QueryParser(PRIORITY_FIELD, analyzer);
+
+ Query query = parser.parse(HIGH_PRIORITY);
+ System.out.println("Query: " + query.toString(PRIORITY_FIELD));
+
+ hits = searcher.search(query);
+ printHits(hits);
+
+ searcher.close();
+
+ // try a new search with OR
+ searcher = new IndexSearcher(directory);
+ hits = null;
+
+ parser = new QueryParser(PRIORITY_FIELD, analyzer);
+
+ query = parser.parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
+ System.out.println("Query: " + query.toString(PRIORITY_FIELD));
+
+ hits = searcher.search(query);
+ printHits(hits);
+
+ searcher.close();
+
+ } catch (Exception e) {
+ System.out.println(" caught a " + e.getClass() +
+ "\n with message: " + e.getMessage());
+ }
+ }
+
+ private static void printHits( Hits hits ) throws IOException {
+ System.out.println(hits.length() + " total results\n");
+ for (int i = 0 ; i < hits.length(); i++) {
+ if ( i < 10 || (i > 94 && i < 105) ) {
+ Document d = hits.doc(i);
+ System.out.println(i + " " + d.get(ID_FIELD));
+ }
+ }
+ }
+
+}
cvs server: Diffing src/test/org/apache/lucene/analysis
cvs server: Diffing src/test/org/apache/lucene/analysis/de
cvs server: Diffing src/test/org/apache/lucene/analysis/ru
cvs server: Diffing src/test/org/apache/lucene/document
cvs server: Diffing src/test/org/apache/lucene/index
Index: src/test/org/apache/lucene/index/DocHelper.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/DocHelper.java,v
retrieving revision 1.1
diff -u -r1.1 DocHelper.java
--- src/test/org/apache/lucene/index/DocHelper.java 20 Feb 2004 20:14:55 -0000 1.1
+++ src/test/org/apache/lucene/index/DocHelper.java 19 Aug 2004 11:57:27 -0000
@@ -1,159 +1,159 @@
-package org.apache.lucene.index;
-
-/**
- * Created by IntelliJ IDEA.
- * User: Grant Ingersoll
- * Date: Feb 2, 2004
- * Time: 6:16:12 PM
- * $Id: DocHelper.java,v 1.1 2004/02/20 20:14:55 cutting Exp $
- * Copyright 2004. Center For Natural Language Processing
- */
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.store.Directory;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Enumeration;
-
-/**
- *
- *
- **/
-class DocHelper {
- public static final String FIELD_1_TEXT = "field one text";
- public static final String TEXT_FIELD_1_KEY = "textField1";
- public static Field textField1 = Field.Text(TEXT_FIELD_1_KEY, FIELD_1_TEXT, false);
-
- public static final String FIELD_2_TEXT = "field field field two text";
- //Fields will be lexicographically sorted. So, the order is: field, text, two
- public static final int [] FIELD_2_FREQS = {3, 1, 1};
- public static final String TEXT_FIELD_2_KEY = "textField2";
- public static Field textField2 = Field.Text(TEXT_FIELD_2_KEY, FIELD_2_TEXT, true);
-
- public static final String KEYWORD_TEXT = "Keyword";
- public static final String KEYWORD_FIELD_KEY = "keyField";
- public static Field keyField = Field.Keyword(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
-
- public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
- public static final String UNINDEXED_FIELD_KEY = "unIndField";
- public static Field unIndField = Field.UnIndexed(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
-
- public static final String UNSTORED_1_FIELD_TEXT = "unstored field text";
- public static final String UNSTORED_FIELD_1_KEY = "unStoredField1";
- public static Field unStoredField1 = Field.UnStored(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, false);
-
- public static final String UNSTORED_2_FIELD_TEXT = "unstored field text";
- public static final String UNSTORED_FIELD_2_KEY = "unStoredField2";
- public static Field unStoredField2 = Field.UnStored(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, true);
-
-// public static Set fieldNamesSet = null;
-// public static Set fieldValuesSet = null;
- public static Map nameValues = null;
-
- static
- {
-
- nameValues = new HashMap();
- nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
- nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
- nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
- nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
- nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
- nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
- }
-
- /**
- * Adds the fields above to a document
- * @param doc The document to write
- */
- public static void setupDoc(Document doc) {
- doc.add(textField1);
- doc.add(textField2);
- doc.add(keyField);
- doc.add(unIndField);
- doc.add(unStoredField1);
- doc.add(unStoredField2);
- }
- /**
- * Writes the document to the directory using a segment named "test"
- * @param dir
- * @param doc
- */
- public static void writeDoc(Directory dir, Document doc)
- {
-
- writeDoc(dir, "test", doc);
- }
- /**
- * Writes the document to the directory in the given segment
- * @param dir
- * @param segment
- * @param doc
- */
- public static void writeDoc(Directory dir, String segment, Document doc)
- {
- Analyzer analyzer = new WhitespaceAnalyzer();
- Similarity similarity = Similarity.getDefault();
- writeDoc(dir, analyzer, similarity, segment, doc);
- }
- /**
- * Writes the document to the directory segment named "test" using the specified analyzer and similarity
- * @param dir
- * @param analyzer
- * @param similarity
- * @param doc
- */
- public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc)
- {
- writeDoc(dir, analyzer, similarity, "test", doc);
- }
- /**
- * Writes the document to the directory segment using the analyzer and the similarity score
- * @param dir
- * @param analyzer
- * @param similarity
- * @param segment
- * @param doc
- */
- public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, String segment, Document doc)
- {
- DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
- try {
- writer.addDocument(segment, doc);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- public static int numFields(Document doc) {
- Enumeration fields = doc.fields();
- int result = 0;
- while (fields.hasMoreElements()) {
- fields.nextElement();
- result++;
- }
- return result;
- }
-}
-/*
- fieldNamesSet = new HashSet();
- fieldNamesSet.add(TEXT_FIELD_1_KEY);
- fieldNamesSet.add(TEXT_FIELD_2_KEY);
- fieldNamesSet.add(KEYWORD_FIELD_KEY);
- fieldNamesSet.add(UNINDEXED_FIELD_KEY);
- fieldNamesSet.add(UNSTORED_FIELD_1_KEY);
- fieldNamesSet.add(UNSTORED_FIELD_2_KEY);
- fieldValuesSet = new HashSet();
- fieldValuesSet.add(FIELD_1_TEXT);
- fieldValuesSet.add(FIELD_2_TEXT);
- fieldValuesSet.add(KEYWORD_TEXT);
- fieldValuesSet.add(UNINDEXED_FIELD_TEXT);
- fieldValuesSet.add(UNSTORED_1_FIELD_TEXT);
- fieldValuesSet.add(UNSTORED_2_FIELD_TEXT);
-*/
+package org.apache.lucene.index;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: Grant Ingersoll
+ * Date: Feb 2, 2004
+ * Time: 6:16:12 PM
+ * $Id: DocHelper.java,v 1.1 2004/02/20 20:14:55 cutting Exp $
+ * Copyright 2004. Center For Natural Language Processing
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Enumeration;
+
+/**
+ *
+ *
+ **/
+class DocHelper {
+ public static final String FIELD_1_TEXT = "field one text";
+ public static final String TEXT_FIELD_1_KEY = "textField1";
+ public static Field textField1 = Field.Text(TEXT_FIELD_1_KEY, FIELD_1_TEXT, false);
+
+ public static final String FIELD_2_TEXT = "field field field two text";
+ //Fields will be lexicographically sorted. So, the order is: field, text, two
+ public static final int [] FIELD_2_FREQS = {3, 1, 1};
+ public static final String TEXT_FIELD_2_KEY = "textField2";
+ public static Field textField2 = Field.Text(TEXT_FIELD_2_KEY, FIELD_2_TEXT, true, true, true);
+
+ public static final String KEYWORD_TEXT = "Keyword";
+ public static final String KEYWORD_FIELD_KEY = "keyField";
+ public static Field keyField = Field.Keyword(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
+
+ public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
+ public static final String UNINDEXED_FIELD_KEY = "unIndField";
+ public static Field unIndField = Field.UnIndexed(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
+
+ public static final String UNSTORED_1_FIELD_TEXT = "unstored field text";
+ public static final String UNSTORED_FIELD_1_KEY = "unStoredField1";
+ public static Field unStoredField1 = Field.UnStored(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, false);
+
+ public static final String UNSTORED_2_FIELD_TEXT = "unstored field text";
+ public static final String UNSTORED_FIELD_2_KEY = "unStoredField2";
+ public static Field unStoredField2 = Field.UnStored(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, true);
+
+// public static Set fieldNamesSet = null;
+// public static Set fieldValuesSet = null;
+ public static Map nameValues = null;
+
+ static
+ {
+
+ nameValues = new HashMap();
+ nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
+ nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
+ nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
+ nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
+ nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
+ nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
+ }
+
+ /**
+ * Adds the fields above to a document
+ * @param doc The document to write
+ */
+ public static void setupDoc(Document doc) {
+ doc.add(textField1);
+ doc.add(textField2);
+ doc.add(keyField);
+ doc.add(unIndField);
+ doc.add(unStoredField1);
+ doc.add(unStoredField2);
+ }
+ /**
+ * Writes the document to the directory using a segment named "test"
+ * @param dir
+ * @param doc
+ */
+ public static void writeDoc(Directory dir, Document doc)
+ {
+
+ writeDoc(dir, "test", doc);
+ }
+ /**
+ * Writes the document to the directory in the given segment
+ * @param dir
+ * @param segment
+ * @param doc
+ */
+ public static void writeDoc(Directory dir, String segment, Document doc)
+ {
+ Analyzer analyzer = new WhitespaceAnalyzer();
+ Similarity similarity = Similarity.getDefault();
+ writeDoc(dir, analyzer, similarity, segment, doc);
+ }
+ /**
+ * Writes the document to the directory segment named "test" using the specified analyzer and similarity
+ * @param dir
+ * @param analyzer
+ * @param similarity
+ * @param doc
+ */
+ public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc)
+ {
+ writeDoc(dir, analyzer, similarity, "test", doc);
+ }
+ /**
+ * Writes the document to the directory segment using the analyzer and the similarity score
+ * @param dir
+ * @param analyzer
+ * @param similarity
+ * @param segment
+ * @param doc
+ */
+ public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, String segment, Document doc)
+ {
+ DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
+ try {
+ writer.addDocument(segment, doc);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ public static int numFields(Document doc) {
+ Enumeration fields = doc.fields();
+ int result = 0;
+ while (fields.hasMoreElements()) {
+ String name = fields.nextElement().toString();
+ result++;
+ }
+ return result;
+ }
+}
+/*
+ fieldNamesSet = new HashSet();
+ fieldNamesSet.add(TEXT_FIELD_1_KEY);
+ fieldNamesSet.add(TEXT_FIELD_2_KEY);
+ fieldNamesSet.add(KEYWORD_FIELD_KEY);
+ fieldNamesSet.add(UNINDEXED_FIELD_KEY);
+ fieldNamesSet.add(UNSTORED_FIELD_1_KEY);
+ fieldNamesSet.add(UNSTORED_FIELD_2_KEY);
+ fieldValuesSet = new HashSet();
+ fieldValuesSet.add(FIELD_1_TEXT);
+ fieldValuesSet.add(FIELD_2_TEXT);
+ fieldValuesSet.add(KEYWORD_TEXT);
+ fieldValuesSet.add(UNINDEXED_FIELD_TEXT);
+ fieldValuesSet.add(UNSTORED_1_FIELD_TEXT);
+ fieldValuesSet.add(UNSTORED_2_FIELD_TEXT);
+*/
Index: src/test/org/apache/lucene/index/TestDocumentWriter.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java,v
retrieving revision 1.2
diff -u -r1.2 TestDocumentWriter.java
--- src/test/org/apache/lucene/index/TestDocumentWriter.java 29 Mar 2004 22:48:06 -0000 1.2
+++ src/test/org/apache/lucene/index/TestDocumentWriter.java 19 Aug 2004 11:57:27 -0000
@@ -1,83 +1,83 @@
-package org.apache.lucene.index;
-
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import junit.framework.TestCase;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-import java.io.IOException;
-
-public class TestDocumentWriter extends TestCase {
- private RAMDirectory dir = new RAMDirectory();
- private Document testDoc = new Document();
-
-
- public TestDocumentWriter(String s) {
- super(s);
- }
-
- protected void setUp() {
- DocHelper.setupDoc(testDoc);
- }
-
- protected void tearDown() {
-
- }
-
- public void test() {
- assertTrue(dir != null);
-
- }
-
- public void testAddDocument() {
- Analyzer analyzer = new WhitespaceAnalyzer();
- Similarity similarity = Similarity.getDefault();
- DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
- assertTrue(writer != null);
- try {
- writer.addDocument("test", testDoc);
- //After adding the document, we should be able to read it back in
- SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir));
- assertTrue(reader != null);
- Document doc = reader.document(0);
- assertTrue(doc != null);
-
- //System.out.println("Document: " + doc);
- Field [] fields = doc.getFields("textField2");
- assertTrue(fields != null && fields.length == 1);
- assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
- assertTrue(fields[0].isTermVectorStored() == true);
-
- fields = doc.getFields("textField1");
- assertTrue(fields != null && fields.length == 1);
- assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT));
- assertTrue(fields[0].isTermVectorStored() == false);
-
- fields = doc.getFields("keyField");
- assertTrue(fields != null && fields.length == 1);
- assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT));
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- }
-}
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import java.io.IOException;
+
+public class TestDocumentWriter extends TestCase {
+ private RAMDirectory dir = new RAMDirectory();
+ private Document testDoc = new Document();
+
+
+ public TestDocumentWriter(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+ DocHelper.setupDoc(testDoc);
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void test() {
+ assertTrue(dir != null);
+
+ }
+
+ public void testAddDocument() {
+ Analyzer analyzer = new WhitespaceAnalyzer();
+ Similarity similarity = Similarity.getDefault();
+ DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
+ assertTrue(writer != null);
+ try {
+ writer.addDocument("test", testDoc);
+ //After adding the document, we should be able to read it back in
+ SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir));
+ assertTrue(reader != null);
+ Document doc = reader.document(0);
+ assertTrue(doc != null);
+
+ //System.out.println("Document: " + doc);
+ Field [] fields = doc.getFields("textField2");
+ assertTrue(fields != null && fields.length == 1);
+ assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
+ assertTrue(fields[0].isTermVectorStored() == true);
+
+ fields = doc.getFields("textField1");
+ assertTrue(fields != null && fields.length == 1);
+ assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT));
+ assertTrue(fields[0].isTermVectorStored() == false);
+
+ fields = doc.getFields("keyField");
+ assertTrue(fields != null && fields.length == 1);
+ assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT));
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+}
Index: src/test/org/apache/lucene/index/TestFieldInfos.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestFieldInfos.java,v
retrieving revision 1.1
diff -u -r1.1 TestFieldInfos.java
--- src/test/org/apache/lucene/index/TestFieldInfos.java 20 Feb 2004 20:14:55 -0000 1.1
+++ src/test/org/apache/lucene/index/TestFieldInfos.java 19 Aug 2004 11:57:27 -0000
@@ -1,65 +1,65 @@
-package org.apache.lucene.index;
-
-
-import junit.framework.TestCase;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.store.RAMOutputStream;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.store.OutputStream;
-
-import java.io.IOException;
-import java.util.Map;
-
-//import org.cnlp.utils.properties.ResourceBundleHelper;
-
-public class TestFieldInfos extends TestCase {
-
- private Document testDoc = new Document();
-
- public TestFieldInfos(String s) {
- super(s);
- }
-
- protected void setUp() {
- DocHelper.setupDoc(testDoc);
- }
-
- protected void tearDown() {
- }
-
- public void test() {
- //Positive test of FieldInfos
- assertTrue(testDoc != null);
- FieldInfos fieldInfos = new FieldInfos();
- fieldInfos.add(testDoc);
- //Since the complement is stored as well in the fields map
- assertTrue(fieldInfos.size() == 7); //this is 7 b/c we are using the no-arg constructor
- RAMDirectory dir = new RAMDirectory();
- String name = "testFile";
- OutputStream output = dir.createFile(name);
- assertTrue(output != null);
- //Use a RAMOutputStream
-
- try {
- fieldInfos.write(output);
- output.close();
- assertTrue(output.length() > 0);
- FieldInfos readIn = new FieldInfos(dir, name);
- assertTrue(fieldInfos.size() == readIn.size());
- FieldInfo info = readIn.fieldInfo("textField1");
- assertTrue(info != null);
- assertTrue(info.storeTermVector == false);
-
- info = readIn.fieldInfo("textField2");
- assertTrue(info != null);
- assertTrue(info.storeTermVector == true);
-
- dir.close();
-
- } catch (IOException e) {
- assertTrue(false);
- }
-
- }
-}
+package org.apache.lucene.index;
+
+
+import junit.framework.TestCase;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.store.RAMDirectory;
+
+import java.io.IOException;
+
+//import org.cnlp.utils.properties.ResourceBundleHelper;
+
+public class TestFieldInfos extends TestCase {
+
+ private Document testDoc = new Document();
+
+ public TestFieldInfos(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+ DocHelper.setupDoc(testDoc);
+ }
+
+ protected void tearDown() {
+ }
+
+ public void test() {
+ //Positive test of FieldInfos
+ assertTrue(testDoc != null);
+ FieldInfos fieldInfos = new FieldInfos();
+ fieldInfos.add(testDoc);
+ //Since the complement is stored as well in the fields map
+ assertTrue(fieldInfos.size() == 7); //this is 7 b/c we are using the no-arg constructor
+ RAMDirectory dir = new RAMDirectory();
+ String name = "testFile";
+ OutputStream output = dir.createFile(name);
+ assertTrue(output != null);
+ //Use a RAMOutputStream
+
+ try {
+ fieldInfos.write(output);
+ output.close();
+ assertTrue(output.length() > 0);
+ FieldInfos readIn = new FieldInfos(dir, name);
+ assertTrue(fieldInfos.size() == readIn.size());
+ FieldInfo info = readIn.fieldInfo("textField1");
+ assertTrue(info != null);
+ assertTrue(info.storeTermVector == false);
+ assertTrue(info.storePositionWithTermVector == false);
+ assertTrue(info.storeOffsetWithTermVector == false);
+
+ info = readIn.fieldInfo("textField2");
+ assertTrue(info != null);
+ assertTrue(info.storeTermVector == true);
+ assertTrue(info.storePositionWithTermVector == true);
+ assertTrue(info.storeOffsetWithTermVector == true);
+ dir.close();
+
+ } catch (IOException e) {
+ assertTrue(false);
+ }
+
+ }
+}
Index: src/test/org/apache/lucene/index/TestFieldsReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestFieldsReader.java,v
retrieving revision 1.2
diff -u -r1.2 TestFieldsReader.java
--- src/test/org/apache/lucene/index/TestFieldsReader.java 29 Mar 2004 22:48:06 -0000 1.2
+++ src/test/org/apache/lucene/index/TestFieldsReader.java 19 Aug 2004 11:57:27 -0000
@@ -1,77 +1,77 @@
-package org.apache.lucene.index;
-
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import junit.framework.TestCase;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.search.Similarity;
-
-import java.util.Map;
-import java.io.IOException;
-
-public class TestFieldsReader extends TestCase {
- private RAMDirectory dir = new RAMDirectory();
- private Document testDoc = new Document();
- private FieldInfos fieldInfos = null;
-
- public TestFieldsReader(String s) {
- super(s);
- }
-
- protected void setUp() {
- fieldInfos = new FieldInfos();
- DocHelper.setupDoc(testDoc);
- fieldInfos.add(testDoc);
- DocumentWriter writer = new DocumentWriter(dir, new WhitespaceAnalyzer(),
- Similarity.getDefault(), 50);
- assertTrue(writer != null);
- try {
- writer.addDocument("test", testDoc);
- }
- catch (IOException e)
- {
-
- }
- }
-
- protected void tearDown() {
-
- }
-
- public void test() {
- assertTrue(dir != null);
- assertTrue(fieldInfos != null);
- try {
- FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
- assertTrue(reader != null);
- assertTrue(reader.size() == 1);
- Document doc = reader.doc(0);
- assertTrue(doc != null);
- assertTrue(doc.getField("textField1") != null);
- Field field = doc.getField("textField2");
- assertTrue(field != null);
- assertTrue(field.isTermVectorStored() == true);
- reader.close();
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- }
-}
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.search.Similarity;
+
+import java.util.Map;
+import java.io.IOException;
+
+public class TestFieldsReader extends TestCase {
+ private RAMDirectory dir = new RAMDirectory();
+ private Document testDoc = new Document();
+ private FieldInfos fieldInfos = null;
+
+ public TestFieldsReader(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+ fieldInfos = new FieldInfos();
+ DocHelper.setupDoc(testDoc);
+ fieldInfos.add(testDoc);
+ DocumentWriter writer = new DocumentWriter(dir, new WhitespaceAnalyzer(),
+ Similarity.getDefault(), 50);
+ assertTrue(writer != null);
+ try {
+ writer.addDocument("test", testDoc);
+ }
+ catch (IOException e)
+ {
+
+ }
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void test() {
+ assertTrue(dir != null);
+ assertTrue(fieldInfos != null);
+ try {
+ FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+ assertTrue(reader != null);
+ assertTrue(reader.size() == 1);
+ Document doc = reader.doc(0);
+ assertTrue(doc != null);
+ assertTrue(doc.getField("textField1") != null);
+ Field field = doc.getField("textField2");
+ assertTrue(field != null);
+ assertTrue(field.isTermVectorStored() == true);
+ reader.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+}
Index: src/test/org/apache/lucene/index/TestSegmentMerger.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java,v
retrieving revision 1.4
diff -u -r1.4 TestSegmentMerger.java
--- src/test/org/apache/lucene/index/TestSegmentMerger.java 8 Aug 2004 13:05:33 -0000 1.4
+++ src/test/org/apache/lucene/index/TestSegmentMerger.java 19 Aug 2004 11:57:27 -0000
@@ -109,6 +109,7 @@
int [] freqs = vector.getTermFrequencies();
assertTrue(freqs != null);
//System.out.println("Freqs size: " + freqs.length);
+ assertTrue(vector instanceof TermPositionVector == true);
for (int i = 0; i < terms.length; i++) {
String term = terms[i];
Index: src/test/org/apache/lucene/index/TestSegmentReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestSegmentReader.java,v
retrieving revision 1.3
diff -u -r1.3 TestSegmentReader.java
--- src/test/org/apache/lucene/index/TestSegmentReader.java 6 Aug 2004 21:32:51 -0000 1.3
+++ src/test/org/apache/lucene/index/TestSegmentReader.java 19 Aug 2004 11:57:27 -0000
@@ -1,199 +1,199 @@
-package org.apache.lucene.index;
-
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import junit.framework.TestCase;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.Enumeration;
-
-public class TestSegmentReader extends TestCase {
- private RAMDirectory dir = new RAMDirectory();
- private Document testDoc = new Document();
- private SegmentReader reader = null;
-
- public TestSegmentReader(String s) {
- super(s);
- }
-
- //TODO: Setup the reader w/ multiple documents
- protected void setUp() {
-
- try {
- DocHelper.setupDoc(testDoc);
- DocHelper.writeDoc(dir, testDoc);
- reader = new SegmentReader(new SegmentInfo("test", 1, dir));
- } catch (IOException e) {
-
- }
- }
-
- protected void tearDown() {
-
- }
-
- public void test() {
- assertTrue(dir != null);
- assertTrue(reader != null);
- assertTrue(DocHelper.nameValues.size() > 0);
- assertTrue(DocHelper.numFields(testDoc) == 6);
- }
-
- public void testDocument() {
- try {
- assertTrue(reader.numDocs() == 1);
- assertTrue(reader.maxDoc() >= 1);
- Document result = reader.document(0);
- assertTrue(result != null);
- //There are 2 unstored fields on the document that are not preserved across writing
- assertTrue(DocHelper.numFields(result) == DocHelper.numFields(testDoc) - 2);
-
- Enumeration fields = result.fields();
- while (fields.hasMoreElements()) {
- Field field = (Field) fields.nextElement();
- assertTrue(field != null);
- assertTrue(DocHelper.nameValues.containsKey(field.name()));
- }
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- }
-
- public void testDelete() {
- Document docToDelete = new Document();
- DocHelper.setupDoc(docToDelete);
- DocHelper.writeDoc(dir, "seg-to-delete", docToDelete);
- try {
- SegmentReader deleteReader = new SegmentReader(new SegmentInfo("seg-to-delete", 1, dir));
- assertTrue(deleteReader != null);
- assertTrue(deleteReader.numDocs() == 1);
- deleteReader.delete(0);
- assertTrue(deleteReader.isDeleted(0) == true);
- assertTrue(deleteReader.hasDeletions() == true);
- assertTrue(deleteReader.numDocs() == 0);
- try {
- Document test = deleteReader.document(0);
- assertTrue(false);
- } catch (IllegalArgumentException e) {
- assertTrue(true);
- }
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- }
-
- public void testGetFieldNameVariations() {
- Collection result = reader.getFieldNames();
- assertTrue(result != null);
- assertTrue(result.size() == 7);
- for (Iterator iter = result.iterator(); iter.hasNext();) {
- String s = (String) iter.next();
- //System.out.println("Name: " + s);
- assertTrue(DocHelper.nameValues.containsKey(s) == true || s.equals(""));
- }
- result = reader.getFieldNames(true);
- assertTrue(result != null);
- assertTrue(result.size() == 5);
- for (Iterator iter = result.iterator(); iter.hasNext();) {
- String s = (String) iter.next();
- assertTrue(DocHelper.nameValues.containsKey(s) == true || s.equals(""));
- }
-
- result = reader.getFieldNames(false);
- assertTrue(result != null);
- assertTrue(result.size() == 2);
- //Get all indexed fields that are storing term vectors
- result = reader.getIndexedFieldNames(true);
- assertTrue(result != null);
- assertTrue(result.size() == 2);
-
- result = reader.getIndexedFieldNames(false);
- assertTrue(result != null);
- assertTrue(result.size() == 3);
- }
-
- public void testTerms() {
- try {
- TermEnum terms = reader.terms();
- assertTrue(terms != null);
- while (terms.next() == true)
- {
- Term term = terms.term();
- assertTrue(term != null);
- //System.out.println("Term: " + term);
- String fieldValue = (String)DocHelper.nameValues.get(term.field());
- assertTrue(fieldValue.indexOf(term.text()) != -1);
- }
-
- TermDocs termDocs = reader.termDocs();
- assertTrue(termDocs != null);
- termDocs.seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
- assertTrue(termDocs.next() == true);
-
- TermPositions positions = reader.termPositions();
- positions.seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
- assertTrue(positions != null);
- assertTrue(positions.doc() == 0);
- assertTrue(positions.nextPosition() >= 0);
-
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- }
-
- public void testNorms() {
- //TODO: Not sure how these work/should be tested
-/*
- try {
- byte [] norms = reader.norms(DocHelper.TEXT_FIELD_1_KEY);
- System.out.println("Norms: " + norms);
- assertTrue(norms != null);
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
-*/
-
- }
-
- public void testTermVectors() {
- TermFreqVector result = reader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
- assertTrue(result != null);
- String [] terms = result.getTerms();
- int [] freqs = result.getTermFrequencies();
- assertTrue(terms != null && terms.length == 3 && freqs != null && freqs.length == 3);
- for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
- int freq = freqs[i];
- assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);
- assertTrue(freq > 0);
- }
-
- TermFreqVector [] results = reader.getTermFreqVectors(0);
- assertTrue(results != null);
- assertTrue(results.length == 2);
- }
-
-}
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Enumeration;
+
+public class TestSegmentReader extends TestCase {
+ private RAMDirectory dir = new RAMDirectory();
+ private Document testDoc = new Document();
+ private SegmentReader reader = null;
+
+ public TestSegmentReader(String s) {
+ super(s);
+ }
+
+ //TODO: Setup the reader w/ multiple documents
+ protected void setUp() {
+
+ try {
+ DocHelper.setupDoc(testDoc);
+ DocHelper.writeDoc(dir, testDoc);
+ reader = new SegmentReader(new SegmentInfo("test", 1, dir));
+ } catch (IOException e) {
+
+ }
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void test() {
+ assertTrue(dir != null);
+ assertTrue(reader != null);
+ assertTrue(DocHelper.nameValues.size() > 0);
+ assertTrue(DocHelper.numFields(testDoc) == 6);
+ }
+
+ public void testDocument() {
+ try {
+ assertTrue(reader.numDocs() == 1);
+ assertTrue(reader.maxDoc() >= 1);
+ Document result = reader.document(0);
+ assertTrue(result != null);
+ //There are 2 unstored fields on the document that are not preserved across writing
+ assertTrue(DocHelper.numFields(result) == DocHelper.numFields(testDoc) - 2);
+
+ Enumeration fields = result.fields();
+ while (fields.hasMoreElements()) {
+ Field field = (Field) fields.nextElement();
+ assertTrue(field != null);
+ assertTrue(DocHelper.nameValues.containsKey(field.name()));
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ public void testDelete() {
+ Document docToDelete = new Document();
+ DocHelper.setupDoc(docToDelete);
+ DocHelper.writeDoc(dir, "seg-to-delete", docToDelete);
+ try {
+ SegmentReader deleteReader = new SegmentReader(new SegmentInfo("seg-to-delete", 1, dir));
+ assertTrue(deleteReader != null);
+ assertTrue(deleteReader.numDocs() == 1);
+ deleteReader.delete(0);
+ assertTrue(deleteReader.isDeleted(0) == true);
+ assertTrue(deleteReader.hasDeletions() == true);
+ assertTrue(deleteReader.numDocs() == 0);
+ try {
+ Document test = deleteReader.document(0);
+ assertTrue(false);
+ } catch (IllegalArgumentException e) {
+ assertTrue(true);
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ public void testGetFieldNameVariations() {
+ Collection result = reader.getFieldNames();
+ assertTrue(result != null);
+ assertTrue(result.size() == 7);
+ for (Iterator iter = result.iterator(); iter.hasNext();) {
+ String s = (String) iter.next();
+ //System.out.println("Name: " + s);
+ assertTrue(DocHelper.nameValues.containsKey(s) == true || s.equals(""));
+ }
+ result = reader.getFieldNames(true);
+ assertTrue(result != null);
+ assertTrue(result.size() == 5);
+ for (Iterator iter = result.iterator(); iter.hasNext();) {
+ String s = (String) iter.next();
+ assertTrue(DocHelper.nameValues.containsKey(s) == true || s.equals(""));
+ }
+
+ result = reader.getFieldNames(false);
+ assertTrue(result != null);
+ assertTrue(result.size() == 2);
+ //Get all indexed fields that are storing term vectors
+ result = reader.getIndexedFieldNames(true);
+ assertTrue(result != null);
+ assertTrue(result.size() == 2);
+
+ result = reader.getIndexedFieldNames(false);
+ assertTrue(result != null);
+ assertTrue(result.size() == 3);
+ }
+
+ public void testTerms() {
+ try {
+ TermEnum terms = reader.terms();
+ assertTrue(terms != null);
+ while (terms.next() == true)
+ {
+ Term term = terms.term();
+ assertTrue(term != null);
+ //System.out.println("Term: " + term);
+ String fieldValue = (String)DocHelper.nameValues.get(term.field());
+ assertTrue(fieldValue.indexOf(term.text()) != -1);
+ }
+
+ TermDocs termDocs = reader.termDocs();
+ assertTrue(termDocs != null);
+ termDocs.seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
+ assertTrue(termDocs.next() == true);
+
+ TermPositions positions = reader.termPositions();
+ positions.seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
+ assertTrue(positions != null);
+ assertTrue(positions.doc() == 0);
+ assertTrue(positions.nextPosition() >= 0);
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ public void testNorms() {
+ //TODO: Not sure how these work/should be tested
+/*
+ try {
+ byte [] norms = reader.norms(DocHelper.TEXT_FIELD_1_KEY);
+ System.out.println("Norms: " + norms);
+ assertTrue(norms != null);
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+*/
+
+ }
+
+ public void testTermVectors() {
+ TermFreqVector result = reader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
+ assertTrue(result != null);
+ String [] terms = result.getTerms();
+ int [] freqs = result.getTermFrequencies();
+ assertTrue(terms != null && terms.length == 3 && freqs != null && freqs.length == 3);
+ for (int i = 0; i < terms.length; i++) {
+ String term = terms[i];
+ int freq = freqs[i];
+ assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);
+ assertTrue(freq > 0);
+ }
+
+ TermFreqVector [] results = reader.getTermFreqVectors(0);
+ assertTrue(results != null);
+ assertTrue(results.length == 2);
+ }
+
+}
Index: src/test/org/apache/lucene/index/TestTermVectorsReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java,v
retrieving revision 1.1
diff -u -r1.1 TestTermVectorsReader.java
--- src/test/org/apache/lucene/index/TestTermVectorsReader.java 20 Feb 2004 20:14:55 -0000 1.1
+++ src/test/org/apache/lucene/index/TestTermVectorsReader.java 19 Aug 2004 11:57:27 -0000
@@ -1,106 +1,218 @@
-package org.apache.lucene.index;
-
-
-import junit.framework.TestCase;
-import org.apache.lucene.store.RAMDirectory;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-public class TestTermVectorsReader extends TestCase {
- private TermVectorsWriter writer = null;
- //Must be lexicographically sorted, will do in setup, versus trying to maintain here
- private String [] testFields = {"f1", "f2", "f3"};
- private String [] testTerms = {"this", "is", "a", "test"};
- private RAMDirectory dir = new RAMDirectory();
- private String seg = "testSegment";
- private FieldInfos fieldInfos = new FieldInfos();
-
- public TestTermVectorsReader(String s) {
- super(s);
- }
-
- protected void setUp() {
- for (int i = 0; i < testFields.length; i++) {
- fieldInfos.add(testFields[i], true, true);
- }
-
- try {
- Arrays.sort(testTerms);
- for (int j = 0; j < 5; j++) {
- writer = new TermVectorsWriter(dir, seg, fieldInfos);
- writer.openDocument();
-
- for (int k = 0; k < testFields.length; k++) {
- writer.openField(testFields[k]);
- for (int i = 0; i < testTerms.length; i++) {
- writer.addTerm(testTerms[i], i);
- }
- writer.closeField();
- }
- writer.closeDocument();
- writer.close();
- }
-
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- }
-
- protected void tearDown() {
-
- }
-
- public void test() {
- //Check to see the files were created properly in setup
- assertTrue(writer.isDocumentOpen() == false);
- assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
- assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
- }
-
- public void testReader() {
- try {
- TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
- assertTrue(reader != null);
- TermFreqVector vector = reader.get(0, testFields[0]);
- assertTrue(vector != null);
- String [] terms = vector.getTerms();
- assertTrue(terms != null);
- assertTrue(terms.length == testTerms.length);
- for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
- //System.out.println("Term: " + term);
- assertTrue(term.equals(testTerms[i]));
- }
-
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- }
-
- /**
- * Make sure exceptions and bad params are handled appropriately
- */
- public void testBadParams() {
- try {
- TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
- assertTrue(reader != null);
- //Bad document number, good field number
- TermFreqVector vector = reader.get(50, testFields[0]);
- assertTrue(vector == null);
- } catch (Exception e) {
- assertTrue(false);
- }
- try {
- TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
- assertTrue(reader != null);
- //good document number, bad field number
- TermFreqVector vector = reader.get(0, "f50");
- assertTrue(vector == null);
- } catch (Exception e) {
- assertTrue(false);
- }
- }
-}
+package org.apache.lucene.index;
+
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+public class TestTermVectorsReader extends TestCase {
+ private TermVectorsWriter writer = null;
+ //Must be lexicographically sorted, will do in setup, versus trying to maintain here
+ private String [] testFields = {"f1", "f2", "f3"};
+ private boolean [] testFieldsStorePos = {true, false, true, false};
+ private boolean [] testFieldsStoreOff = {true, false, false, true};
+ private String [] testTerms = {"this", "is", "a", "test"};
+ private int [][] positions = new int[testTerms.length][];
+ private TermVectorOffsetInfo [][] offsets = new TermVectorOffsetInfo[testTerms.length][];
+ private RAMDirectory dir = new RAMDirectory();
+ private String seg = "testSegment";
+ private FieldInfos fieldInfos = new FieldInfos();
+
+ public TestTermVectorsReader(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+ for (int i = 0; i < testFields.length; i++) {
+ fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
+ }
+
+ for (int i = 0; i < testTerms.length; i++)
+ {
+ positions[i] = new int[3];
+ for (int j = 0; j < positions[i].length; j++) {
+ positions[i][j] = (int)(Math.random() * 1000);
+ }
+ offsets[i] = new TermVectorOffsetInfo[3];
+ for (int j = 0; j < offsets[i].length; j++){
+ offsets[i][j] = new TermVectorOffsetInfo(0, testTerms[i].length());
+ }
+ }
+ try {
+ Arrays.sort(testTerms);
+ for (int j = 0; j < 5; j++) {
+ writer = new TermVectorsWriter(dir, seg, fieldInfos);
+ writer.openDocument();
+
+ for (int k = 0; k < testFields.length; k++) {
+ writer.openField(testFields[k]);
+ for (int i = 0; i < testTerms.length; i++) {
+ writer.addTerm(testTerms[i], 3, positions[i], offsets[i]);
+ }
+ writer.closeField();
+ }
+ writer.closeDocument();
+ writer.close();
+ }
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void test() {
+ //Check to see the files were created properly in setup
+ assertTrue(writer.isDocumentOpen() == false);
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+ }
+
+ public void testReader() {
+ try {
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ assertTrue(reader != null);
+ TermFreqVector vector = reader.get(0, testFields[0]);
+ assertTrue(vector != null);
+ String [] terms = vector.getTerms();
+ assertTrue(terms != null);
+ assertTrue(terms.length == testTerms.length);
+ for (int i = 0; i < terms.length; i++) {
+ String term = terms[i];
+ //System.out.println("Term: " + term);
+ assertTrue(term.equals(testTerms[i]));
+ }
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ public void testPositionReader() {
+ try {
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ assertTrue(reader != null);
+ TermPositionVector vector;
+ String [] terms;
+ vector = (TermPositionVector)reader.get(0, testFields[0]);
+ assertTrue(vector != null);
+ terms = vector.getTerms();
+ assertTrue(terms != null);
+ assertTrue(terms.length == testTerms.length);
+ for (int i = 0; i < terms.length; i++) {
+ String term = terms[i];
+ //System.out.println("Term: " + term);
+ assertTrue(term.equals(testTerms[i]));
+ int [] positions = vector.getTermPositions(i);
+ assertTrue(positions != null);
+ assertTrue(positions.length == this.positions[i].length);
+ for (int j = 0; j < positions.length; j++) {
+ int position = positions[j];
+ assertTrue(position == this.positions[i][j]);
+ }
+ TermVectorOffsetInfo [] offset = vector.getOffsets(i);
+ assertTrue(offset != null);
+ assertTrue(offset.length == this.offsets[i].length);
+ for (int j = 0; j < offset.length; j++) {
+ TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
+ assertTrue(termVectorOffsetInfo.equals(offsets[i][j]));
+ }
+ }
+
+ TermFreqVector freqVector = (TermFreqVector)reader.get(0, testFields[1]); //no pos, no offset
+ assertTrue(freqVector != null);
+ assertTrue(freqVector instanceof TermPositionVector == false);
+ terms = freqVector.getTerms();
+ assertTrue(terms != null);
+ assertTrue(terms.length == testTerms.length);
+ for (int i = 0; i < terms.length; i++) {
+ String term = terms[i];
+ //System.out.println("Term: " + term);
+ assertTrue(term.equals(testTerms[i]));
+ }
+
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ catch (ClassCastException cce)
+ {
+ cce.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ public void testOffsetReader() {
+ try {
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ assertTrue(reader != null);
+ TermPositionVector vector = (TermPositionVector)reader.get(0, testFields[0]);
+ assertTrue(vector != null);
+ String [] terms = vector.getTerms();
+ assertTrue(terms != null);
+ assertTrue(terms.length == testTerms.length);
+ for (int i = 0; i < terms.length; i++) {
+ String term = terms[i];
+ //System.out.println("Term: " + term);
+ assertTrue(term.equals(testTerms[i]));
+ int [] positions = vector.getTermPositions(i);
+ assertTrue(positions != null);
+ assertTrue(positions.length == this.positions[i].length);
+ for (int j = 0; j < positions.length; j++) {
+ int position = positions[j];
+ assertTrue(position == this.positions[i][j]);
+ }
+ TermVectorOffsetInfo [] offset = vector.getOffsets(i);
+ assertTrue(offset != null);
+ assertTrue(offset.length == this.offsets[i].length);
+ for (int j = 0; j < offset.length; j++) {
+ TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
+ assertTrue(termVectorOffsetInfo.equals(offsets[i][j]));
+ }
+ }
+
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ catch (ClassCastException cce)
+ {
+ cce.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+
+ /**
+ * Make sure exceptions and bad params are handled appropriately
+ */
+ public void testBadParams() {
+ try {
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ assertTrue(reader != null);
+ //Bad document number, good field number
+ TermFreqVector vector = reader.get(50, testFields[0]);
+ assertTrue(vector == null);
+ } catch (Exception e) {
+ assertTrue(false);
+ }
+ try {
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ assertTrue(reader != null);
+ //good document number, bad field number
+ TermFreqVector vector = reader.get(0, "f50");
+ assertTrue(vector == null);
+ } catch (Exception e) {
+ assertTrue(false);
+ }
+ }
+}
Index: src/test/org/apache/lucene/index/TestTermVectorsWriter.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java,v
retrieving revision 1.2
diff -u -r1.2 TestTermVectorsWriter.java
--- src/test/org/apache/lucene/index/TestTermVectorsWriter.java 29 Mar 2004 22:48:06 -0000 1.2
+++ src/test/org/apache/lucene/index/TestTermVectorsWriter.java 19 Aug 2004 11:57:27 -0000
@@ -1,202 +1,284 @@
-package org.apache.lucene.index;
-
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import junit.framework.TestCase;
-import org.apache.lucene.store.RAMDirectory;
-
-import java.io.IOException;
-
-public class TestTermVectorsWriter extends TestCase {
-
- private String[] testTerms = {"this", "is", "a", "test"};
- private String [] testFields = {"f1", "f2", "f3"};
- private int[][] positions = new int[testTerms.length][];
- private RAMDirectory dir = new RAMDirectory();
- private String seg = "testSegment";
- private FieldInfos fieldInfos = new FieldInfos();
-
- public TestTermVectorsWriter(String s) {
- super(s);
- }
-
- protected void setUp() {
-
- for (int i = 0; i < testFields.length; i++) {
- fieldInfos.add(testFields[i], true, true);
- }
-
-
- for (int i = 0; i < testTerms.length; i++) {
- positions[i] = new int[5];
- for (int j = 0; j < positions[i].length; j++) {
- positions[i][j] = i * 100;
- }
- }
- }
-
- protected void tearDown() {
- }
-
- public void test() {
- assertTrue(dir != null);
- assertTrue(positions != null);
- }
-
- /*public void testWriteNoPositions() {
- try {
- TermVectorsWriter writer = new TermVectorsWriter(dir, seg, 50);
- writer.openDocument();
- assertTrue(writer.isDocumentOpen() == true);
- writer.openField(0);
- assertTrue(writer.isFieldOpen() == true);
- for (int i = 0; i < testTerms.length; i++) {
- writer.addTerm(testTerms[i], i);
- }
- writer.closeField();
-
- writer.closeDocument();
- writer.close();
- assertTrue(writer.isDocumentOpen() == false);
- //Check to see the files were created
- assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
- assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
- //Now read it back in
- TermVectorsReader reader = new TermVectorsReader(dir, seg);
- assertTrue(reader != null);
- checkTermVector(reader, 0, 0);
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- } */
-
- public void testWriter() {
- try {
- TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
- writer.openDocument();
- assertTrue(writer.isDocumentOpen() == true);
- writeField(writer, testFields[0]);
- writer.closeDocument();
- writer.close();
- assertTrue(writer.isDocumentOpen() == false);
- //Check to see the files were created
- assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
- assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
- //Now read it back in
- TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
- assertTrue(reader != null);
- checkTermVector(reader, 0, testFields[0]);
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- }
- private void checkTermVector(TermVectorsReader reader, int docNum, String field) throws IOException {
- TermFreqVector vector = reader.get(docNum, field);
- assertTrue(vector != null);
- String[] terms = vector.getTerms();
- assertTrue(terms != null);
- assertTrue(terms.length == testTerms.length);
- for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
- assertTrue(term.equals(testTerms[i]));
- }
- }
-
- /**
- * Test one document, multiple fields
- */
- public void testMultipleFields() {
- try {
- TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
- writeDocument(writer, testFields.length);
-
- writer.close();
-
- assertTrue(writer.isDocumentOpen() == false);
- //Check to see the files were created
- assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
- assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
- //Now read it back in
- TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
- assertTrue(reader != null);
-
- for (int j = 0; j < testFields.length; j++) {
- checkTermVector(reader, 0, testFields[j]);
- }
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- }
-
- private void writeDocument(TermVectorsWriter writer, int numFields) throws IOException {
- writer.openDocument();
- assertTrue(writer.isDocumentOpen() == true);
-
- for (int j = 0; j < numFields; j++) {
- writeField(writer, testFields[j]);
- }
- writer.closeDocument();
- assertTrue(writer.isDocumentOpen() == false);
- }
-
- /**
- *
- * @param writer The writer to write to
- * @param j The field number
- * @throws IOException
- */
- private void writeField(TermVectorsWriter writer, String f) throws IOException {
- writer.openField(f);
- assertTrue(writer.isFieldOpen() == true);
- for (int i = 0; i < testTerms.length; i++) {
- writer.addTerm(testTerms[i], i);
- }
- writer.closeField();
- }
-
-
- public void testMultipleDocuments() {
-
- try {
- TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
- assertTrue(writer != null);
- for (int i = 0; i < 10; i++) {
- writeDocument(writer, testFields.length);
- }
- writer.close();
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- //Do some arbitrary tests
- try {
- TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
- for (int i = 0; i < 10; i++) {
- assertTrue(reader != null);
- checkTermVector(reader, 5, testFields[0]);
- checkTermVector(reader, 2, testFields[2]);
- }
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
- }
-
-}
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+
+import java.io.IOException;
+
+public class TestTermVectorsWriter extends TestCase {
+
+ private String[] testTerms = {"this", "is", "a", "test"};
+ private String [] testFields = {"f1", "f2", "f3", "f4"};
+ private boolean [] testFieldsStorePos = {true, false, true, false};
+ private boolean [] testFieldsStoreOff = {true, false, false, true};
+ private int [][] positions = new int[testTerms.length][];
+ private TermVectorOffsetInfo [][] offsets = new TermVectorOffsetInfo[testTerms.length][];
+
+ private RAMDirectory dir = new RAMDirectory();
+ private String seg = "testSegment";
+ private FieldInfos fieldInfos = new FieldInfos();
+
+ public TestTermVectorsWriter(String s) {
+ super(s);
+ }
+
+ protected void setUp() {
+
+ for (int i = 0; i < testFields.length; i++) {
+ fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
+ }
+ for (int i = 0; i < testTerms.length; i++)
+ {
+ positions[i] = new int[3];
+ for (int j = 0; j < positions[i].length; j++) {
+ positions[i][j] = (int)(Math.random() * 1000);
+ }
+ offsets[i] = new TermVectorOffsetInfo[3];
+ for (int j = 0; j < offsets[i].length; j++){
+ offsets[i][j] = new TermVectorOffsetInfo(0, testTerms[i].length());
+ }
+ }
+ }
+
+ protected void tearDown() {
+ }
+
+ public void test() {
+ assertTrue(dir != null);
+ assertTrue(positions != null);
+ assertTrue(offsets != null);
+ }
+
+ public void testWriter() {
+ try {
+ TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
+ writer.openDocument();
+ assertTrue(writer.isDocumentOpen() == true);
+ writeField(writer, testFields[0], testFieldsStorePos[0], testFieldsStoreOff[0]);
+ writer.closeDocument();
+ writer.close();
+ assertTrue(writer.isDocumentOpen() == false);
+ //Check to see the files were created
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+ //Now read it back in
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ assertTrue(reader != null);
+ checkTermVector(reader, 0, testFields[0]);
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ public void testPositionOffsetWriter() {
+ try {
+ TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
+ writer.openDocument();
+ assertTrue(writer.isDocumentOpen() == true);
+ writeField(writer, testFields[0], testFieldsStorePos[0], testFieldsStoreOff[0]);
+ writer.closeDocument();
+ writer.close();
+ assertTrue(writer.isDocumentOpen() == false);
+ //Check to see the files were created
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+ //Now read it back in
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ assertTrue(reader != null);
+ checkPositionVector(reader, 0, testFields[0], testFieldsStorePos[0], testFieldsStoreOff[0]);
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ public void testPositionWriter() {
+ try {
+ //ONly write position
+ TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
+ writer.openDocument();
+ assertTrue(writer.isDocumentOpen() == true);
+ writeField(writer, testFields[0], true, false);
+ writer.closeDocument();
+ writer.close();
+ assertTrue(writer.isDocumentOpen() == false);
+ //Check to see the files were created
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+ //Now read it back in
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ assertTrue(reader != null);
+ checkPositionVector(reader, 0, testFields[0], true, false);
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ public void testOffsetWriter() {
+ try {
+ //ONly write position
+ TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
+ writer.openDocument();
+ assertTrue(writer.isDocumentOpen() == true);
+ writeField(writer, testFields[0], false, true);
+ writer.closeDocument();
+ writer.close();
+ assertTrue(writer.isDocumentOpen() == false);
+ //Check to see the files were created
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+ //Now read it back in
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ assertTrue(reader != null);
+ checkPositionVector(reader, 0, testFields[0], false, true);
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ private void checkTermVector(TermVectorsReader reader, int docNum, String field) throws IOException {
+ TermFreqVector vector = reader.get(docNum, field);
+ assertTrue(vector != null);
+ String[] terms = vector.getTerms();
+ assertTrue(terms != null);
+ assertTrue(terms.length == testTerms.length);
+ for (int i = 0; i < terms.length; i++) {
+ String term = terms[i];
+ assertTrue(term.equals(testTerms[i]));
+ }
+ }
+
+ private void checkPositionVector(TermVectorsReader reader, int docNum, String field, boolean checkPos, boolean checkOff) throws IOException {
+ TermPositionVector vector = (TermPositionVector)reader.get(docNum, field);
+ assertTrue(vector != null);
+ String[] terms = vector.getTerms();
+ assertTrue(terms != null);
+ assertTrue(terms.length == testTerms.length);
+ for (int i = 0; i < terms.length; i++) {
+ String term = terms[i];
+ assertTrue(term.equals(testTerms[i]));
+ if (checkPos == true) {
+ int [] pos = vector.getTermPositions(i);
+ assertTrue(pos != null);
+ assertTrue(pos.length == positions[i].length);
+ }
+ if (checkOff == true) {
+ TermVectorOffsetInfo [] offs = vector.getOffsets(i);
+ assertTrue(offs != null);
+ assertTrue(offs.length == offsets[i].length);
+ }
+ }
+ }
+
+ /**
+ * Test one document, multiple fields
+ */
+ public void testMultipleFields() {
+ try {
+ TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
+ writeDocument(writer, testFields.length);
+
+ writer.close();
+
+ assertTrue(writer.isDocumentOpen() == false);
+ //Check to see the files were created
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+ //Now read it back in
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ assertTrue(reader != null);
+
+ for (int j = 0; j < testFields.length; j++) {
+ checkTermVector(reader, 0, testFields[j]);
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+ private void writeDocument(TermVectorsWriter writer, int numFields) throws IOException {
+ writer.openDocument();
+ assertTrue(writer.isDocumentOpen() == true);
+
+ for (int j = 0; j < numFields; j++) {
+ writeField(writer, testFields[j], testFieldsStorePos[j], testFieldsStoreOff[j]);
+ }
+ writer.closeDocument();
+ assertTrue(writer.isDocumentOpen() == false);
+ }
+
+ /**
+ *
+ * @param writer The writer to write to
+ * @param f The field name
+ * @throws IOException
+ */
+ private void writeField(TermVectorsWriter writer, String f, boolean storePos, boolean storeOff) throws IOException {
+ writer.openField(f);
+ assertTrue(writer.isFieldOpen() == true);
+ for (int i = 0; i < testTerms.length; i++) {
+ if (storePos == false && storeOff == false) {
+ writer.addTerm(testTerms[i], 3);
+ }
+ else if (storePos == true && storeOff == false) {
+ writer.addTerm(testTerms[i], 3, positions[i], null);
+ }
+ else if (storePos == false && storeOff == true) {
+ writer.addTerm(testTerms[i], 3, null, offsets[i]);
+ }
+ else if (storePos == true && storeOff == true) {
+ writer.addTerm(testTerms[i], 3, positions[i], offsets[i]);
+ }
+ }
+ writer.closeField();
+ }
+
+
+ public void testMultipleDocuments() {
+
+ try {
+ TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
+ assertTrue(writer != null);
+ for (int i = 0; i < 10; i++) {
+ writeDocument(writer, testFields.length);
+ }
+ writer.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ //Do some arbitrary tests
+ try {
+ TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+ for (int i = 0; i < 10; i++) {
+ assertTrue(reader != null);
+ checkTermVector(reader, 5, testFields[0]);
+ checkTermVector(reader, 2, testFields[2]);
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+ }
+
+}
cvs server: Diffing src/test/org/apache/lucene/index/store
cvs server: Diffing src/test/org/apache/lucene/queryParser
cvs server: Diffing src/test/org/apache/lucene/search
Index: src/test/org/apache/lucene/search/TestTermVectors.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/search/TestTermVectors.java,v
retrieving revision 1.3
diff -u -r1.3 TestTermVectors.java
--- src/test/org/apache/lucene/search/TestTermVectors.java 13 Aug 2004 18:38:44 -0000 1.3
+++ src/test/org/apache/lucene/search/TestTermVectors.java 19 Aug 2004 11:57:27 -0000
@@ -1,220 +1,283 @@
-package org.apache.lucene.search;
-
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import junit.framework.TestCase;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.*;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.English;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-public class TestTermVectors extends TestCase {
- private IndexSearcher searcher;
- private RAMDirectory directory = new RAMDirectory();
- public TestTermVectors(String s) {
- super(s);
- }
-
- public void setUp() throws Exception {
- IndexWriter writer
- = new IndexWriter(directory, new SimpleAnalyzer(), true);
- //writer.setUseCompoundFile(true);
- //writer.infoStream = System.out;
- for (int i = 0; i < 1000; i++) {
- Document doc = new Document();
- doc.add(Field.Text("field", English.intToEnglish(i), true));
- writer.addDocument(doc);
- }
- writer.close();
- searcher = new IndexSearcher(directory);
- }
-
- protected void tearDown() {
-
- }
-
- public void test() {
- assertTrue(searcher != null);
- }
-
- public void testTermVectors() {
- Query query = new TermQuery(new Term("field", "seventy"));
- try {
- Hits hits = searcher.search(query);
- assertEquals(100, hits.length());
-
- for (int i = 0; i < hits.length(); i++)
- {
- TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
- assertTrue(vector != null);
- assertTrue(vector.length == 1);
- //assertTrue();
- }
- TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(50));
- //System.out.println("Explain: " + searcher.explain(query, hits.id(50)));
- //System.out.println("Vector: " + vector[0].toString());
- } catch (IOException e) {
- assertTrue(false);
- }
- }
-
- public void testTermPositionVectors() {
- Query query = new TermQuery(new Term("field", "fifty"));
- try {
- Hits hits = searcher.search(query);
- assertEquals(100, hits.length());
-
- for (int i = 0; i < hits.length(); i++)
- {
- TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
- assertTrue(vector != null);
- assertTrue(vector.length == 1);
- //assertTrue();
- }
- } catch (IOException e) {
- assertTrue(false);
- }
- }
-
- public void testKnownSetOfDocuments() {
- String test1 = "eating chocolate in a computer lab"; //6 terms
- String test2 = "computer in a computer lab"; //5 terms
- String test3 = "a chocolate lab grows old"; //5 terms
- String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
- Map test4Map = new HashMap();
- test4Map.put("chocolate", new Integer(3));
- test4Map.put("lab", new Integer(2));
- test4Map.put("eating", new Integer(1));
- test4Map.put("computer", new Integer(1));
- test4Map.put("with", new Integer(1));
- test4Map.put("a", new Integer(1));
- test4Map.put("colored", new Integer(1));
- test4Map.put("in", new Integer(1));
- test4Map.put("an", new Integer(1));
- test4Map.put("computer", new Integer(1));
- test4Map.put("old", new Integer(1));
-
- Document testDoc1 = new Document();
- setupDoc(testDoc1, test1);
- Document testDoc2 = new Document();
- setupDoc(testDoc2, test2);
- Document testDoc3 = new Document();
- setupDoc(testDoc3, test3);
- Document testDoc4 = new Document();
- setupDoc(testDoc4, test4);
-
- Directory dir = new RAMDirectory();
-
- try {
- IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
- assertTrue(writer != null);
- writer.addDocument(testDoc1);
- writer.addDocument(testDoc2);
- writer.addDocument(testDoc3);
- writer.addDocument(testDoc4);
- writer.close();
- IndexSearcher knownSearcher = new IndexSearcher(dir);
- TermEnum termEnum = knownSearcher.reader.terms();
- TermDocs termDocs = knownSearcher.reader.termDocs();
- //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);
-
- Similarity sim = knownSearcher.getSimilarity();
- while (termEnum.next() == true)
- {
- Term term = termEnum.term();
- //System.out.println("Term: " + term);
- termDocs.seek(term);
- while (termDocs.next())
- {
- int docId = termDocs.doc();
- int freq = termDocs.freq();
- //System.out.println("Doc Id: " + docId + " freq " + freq);
- TermFreqVector vector = knownSearcher.reader.getTermFreqVector(docId, "field");
- float tf = sim.tf(freq);
- float idf = sim.idf(term, knownSearcher);
- //float qNorm = sim.queryNorm()
- //This is fine since we don't have stop words
- float lNorm = sim.lengthNorm("field", vector.getTerms().length);
- //float coord = sim.coord()
- //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
- assertTrue(vector != null);
- String[] vTerms = vector.getTerms();
- int [] freqs = vector.getTermFrequencies();
- for (int i = 0; i < vTerms.length; i++)
- {
- if (term.text().equals(vTerms[i]) == true)
- {
- assertTrue(freqs[i] == freq);
- }
- }
-
- }
- //System.out.println("--------");
- }
- Query query = new TermQuery(new Term("field", "chocolate"));
- Hits hits = knownSearcher.search(query);
- //doc 3 should be the first hit b/c it is the shortest match
- assertTrue(hits.length() == 3);
- float score = hits.score(0);
- /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
- System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
- System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
- System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
- System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString());
- System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
- assertTrue(testDoc3.toString().equals(hits.doc(0).toString()));
- assertTrue(testDoc4.toString().equals(hits.doc(1).toString()));
- assertTrue(testDoc1.toString().equals(hits.doc(2).toString()));
- TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits.id(1), "field");
- assertTrue(vector != null);
- //System.out.println("Vector: " + vector);
- String[] terms = vector.getTerms();
- int [] freqs = vector.getTermFrequencies();
- assertTrue(terms != null && terms.length == 10);
- for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
- //System.out.println("Term: " + term);
- int freq = freqs[i];
- assertTrue(test4.indexOf(term) != -1);
- Integer freqInt = (Integer)test4Map.get(term);
- assertTrue(freqInt != null);
- assertTrue(freqInt.intValue() == freq);
- }
- knownSearcher.close();
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
- }
-
-
- }
-
- private void setupDoc(Document doc, String text)
- {
- doc.add(Field.Text("field", text, true));
- //System.out.println("Document: " + doc);
- }
-
-
-}
+package org.apache.lucene.search;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.*;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.English;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+public class TestTermVectors extends TestCase {
+ private IndexSearcher searcher;
+ private RAMDirectory directory = new RAMDirectory();
+ public TestTermVectors(String s) {
+ super(s);
+ }
+
+ public void setUp() throws Exception {
+ IndexWriter writer
+ = new IndexWriter(directory, new SimpleAnalyzer(), true);
+ //writer.setUseCompoundFile(true);
+ //writer.infoStream = System.out;
+ for (int i = 0; i < 1000; i++) {
+ Document doc = new Document();
+ boolean storePos = (i % 2) == 0 ? true : false; //Every other will store position and offset info
+ boolean storeOff = (i % 3) == 0 ? true : false; //Every 3rd will store position and offset info
+ doc.add(Field.Text("field", English.intToEnglish(i), true, storePos, storeOff));
+ writer.addDocument(doc);
+ }
+ writer.close();
+ searcher = new IndexSearcher(directory);
+ }
+
+ protected void tearDown() {
+
+ }
+
+ public void test() {
+ assertTrue(searcher != null);
+ }
+
+ public void testTermVectors() {
+ Query query = new TermQuery(new Term("field", "seventy"));
+ try {
+ Hits hits = searcher.search(query);
+ assertEquals(100, hits.length());
+
+ for (int i = 0; i < hits.length(); i++)
+ {
+ TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
+ assertTrue(vector != null);
+ assertTrue(vector.length == 1);
+ //assertTrue();
+ }
+ TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(50));
+ //System.out.println("Explain: " + searcher.explain(query, hits.id(50)));
+ //System.out.println("Vector: " + vector[0].toString());
+ } catch (IOException e) {
+ assertTrue(false);
+ }
+ }
+
+ public void testTermPositionVectors() {
+ Query query = new TermQuery(new Term("field", "zero"));
+ try {
+ Hits hits = searcher.search(query);
+ assertEquals(1, hits.length());
+
+ for (int i = 0; i < hits.length(); i++)
+ {
+ TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
+ assertTrue(vector != null);
+ assertTrue(vector.length == 1);
+ boolean shouldBePosVector = (hits.id(i) % 2 == 0) ? true : false;
+ assertTrue((shouldBePosVector == false) || (shouldBePosVector == true && (vector[0] instanceof TermPositionVector == true)));
+ if (shouldBePosVector == true)
+ {
+ TermPositionVector posVec = (TermPositionVector)vector[0];
+ String [] terms = posVec.getTerms();
+ assertTrue(terms != null && terms.length > 0);
+ for (int j = 0; j < terms.length; j++) {
+ int [] positions = posVec.getTermPositions(j);
+ assertTrue(positions != null);
+ assertTrue(positions.length > 0);
+ }
+ }
+ boolean shouldBeOffVector = (hits.id(i) % 3 == 0) ? true : false;
+ if (shouldBeOffVector == true)
+ {
+ TermPositionVector posVec = (TermPositionVector)vector[0];
+ String [] terms = posVec.getTerms();
+ assertTrue(terms != null && terms.length > 0);
+ for (int j = 0; j < terms.length; j++) {
+ String term = terms[j];
+ TermVectorOffsetInfo [] offsets = posVec.getOffsets(j);
+ assertTrue(offsets != null);
+ assertTrue(offsets.length > 0);
+ }
+ }
+ boolean shouldBeBothVector = (hits.id(i) % 6 == 0) ? true : false;
+ //System.out.println("Hit Id: " + hits.id(i));
+ if (shouldBeBothVector == true)
+ {
+ TermPositionVector posVec = (TermPositionVector)vector[0];
+ String [] terms = posVec.getTerms();
+ assertTrue(terms != null && terms.length > 0);
+ for (int j = 0; j < terms.length; j++) {
+ TermVectorOffsetInfo [] offsets = posVec.getOffsets(j);
+ assertTrue(offsets != null);
+ assertTrue(offsets.length > 0);
+ int [] positions = posVec.getTermPositions(j);
+ assertTrue(positions != null);
+ assertTrue(positions.length > 0);
+ }
+ }
+ //assertTrue();
+ }
+ } catch (IOException e) {
+ assertTrue(false);
+ }
+ }
+
+ public void testTermOffsetVectors() {
+ Query query = new TermQuery(new Term("field", "fifty"));
+ try {
+ Hits hits = searcher.search(query);
+ assertEquals(100, hits.length());
+
+ for (int i = 0; i < hits.length(); i++)
+ {
+ TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
+ assertTrue(vector != null);
+ assertTrue(vector.length == 1);
+
+ //assertTrue();
+ }
+ } catch (IOException e) {
+ assertTrue(false);
+ }
+ }
+
+ public void testKnownSetOfDocuments() {
+ String test1 = "eating chocolate in a computer lab"; //6 terms
+ String test2 = "computer in a computer lab"; //5 terms
+ String test3 = "a chocolate lab grows old"; //5 terms
+ String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
+ Map test4Map = new HashMap();
+ test4Map.put("chocolate", new Integer(3));
+ test4Map.put("lab", new Integer(2));
+ test4Map.put("eating", new Integer(1));
+ test4Map.put("computer", new Integer(1));
+ test4Map.put("with", new Integer(1));
+ test4Map.put("a", new Integer(1));
+ test4Map.put("colored", new Integer(1));
+ test4Map.put("in", new Integer(1));
+ test4Map.put("an", new Integer(1));
+ test4Map.put("computer", new Integer(1));
+ test4Map.put("old", new Integer(1));
+
+ Document testDoc1 = new Document();
+ setupDoc(testDoc1, test1);
+ Document testDoc2 = new Document();
+ setupDoc(testDoc2, test2);
+ Document testDoc3 = new Document();
+ setupDoc(testDoc3, test3);
+ Document testDoc4 = new Document();
+ setupDoc(testDoc4, test4);
+
+ Directory dir = new RAMDirectory();
+
+ try {
+ IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
+ assertTrue(writer != null);
+ writer.addDocument(testDoc1);
+ writer.addDocument(testDoc2);
+ writer.addDocument(testDoc3);
+ writer.addDocument(testDoc4);
+ writer.close();
+ IndexSearcher knownSearcher = new IndexSearcher(dir);
+ TermEnum termEnum = knownSearcher.reader.terms();
+ TermDocs termDocs = knownSearcher.reader.termDocs();
+ //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);
+
+ Similarity sim = knownSearcher.getSimilarity();
+ while (termEnum.next() == true)
+ {
+ Term term = termEnum.term();
+ //System.out.println("Term: " + term);
+ termDocs.seek(term);
+ while (termDocs.next())
+ {
+ int docId = termDocs.doc();
+ int freq = termDocs.freq();
+ //System.out.println("Doc Id: " + docId + " freq " + freq);
+ TermFreqVector vector = knownSearcher.reader.getTermFreqVector(docId, "field");
+ float tf = sim.tf(freq);
+ float idf = sim.idf(term, knownSearcher);
+ //float qNorm = sim.queryNorm()
+ //This is fine since we don't have stop words
+ float lNorm = sim.lengthNorm("field", vector.getTerms().length);
+ //float coord = sim.coord()
+ //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
+ assertTrue(vector != null);
+ String[] vTerms = vector.getTerms();
+ int [] freqs = vector.getTermFrequencies();
+ for (int i = 0; i < vTerms.length; i++)
+ {
+ if (term.text().equals(vTerms[i]) == true)
+ {
+ assertTrue(freqs[i] == freq);
+ }
+ }
+
+ }
+ //System.out.println("--------");
+ }
+ Query query = new TermQuery(new Term("field", "chocolate"));
+ Hits hits = knownSearcher.search(query);
+ //doc 3 should be the first hit b/c it is the shortest match
+ assertTrue(hits.length() == 3);
+ float score = hits.score(0);
+ /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
+ System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
+ System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
+ System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
+ System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString());
+ System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
+ assertTrue(testDoc3.toString().equals(hits.doc(0).toString()));
+ assertTrue(testDoc4.toString().equals(hits.doc(1).toString()));
+ assertTrue(testDoc1.toString().equals(hits.doc(2).toString()));
+ TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits.id(1), "field");
+ assertTrue(vector != null);
+ //System.out.println("Vector: " + vector);
+ String[] terms = vector.getTerms();
+ int [] freqs = vector.getTermFrequencies();
+ assertTrue(terms != null && terms.length == 10);
+ for (int i = 0; i < terms.length; i++) {
+ String term = terms[i];
+ //System.out.println("Term: " + term);
+ int freq = freqs[i];
+ assertTrue(test4.indexOf(term) != -1);
+ Integer freqInt = (Integer)test4Map.get(term);
+ assertTrue(freqInt != null);
+ assertTrue(freqInt.intValue() == freq);
+ }
+ knownSearcher.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+
+
+ }
+
+ private void setupDoc(Document doc, String text)
+ {
+ doc.add(Field.Text("field", text, true));
+ //System.out.println("Document: " + doc);
+ }
+
+
+}
cvs server: Diffing src/test/org/apache/lucene/search/spans
cvs server: Diffing src/test/org/apache/lucene/store
cvs server: Diffing src/test/org/apache/lucene/util
cvs server: Diffing xdocs
cvs server: Diffing xdocs/images
cvs server: Diffing xdocs/lucene-sandbox
cvs server: Diffing xdocs/lucene-sandbox/larm
cvs server: Diffing xdocs/stylesheets