Index: BinaryField.java =================================================================== --- BinaryField.java (revision 0) +++ BinaryField.java (revision 0) @@ -0,0 +1,37 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.EnumSet; + +public final class BinaryField extends Field { + + public static FieldType DEFAULT_TYPE = new FieldType(EnumSet.of(FieldType.Property.INDEXED + , FieldType.Property.OMIT_NORMS + , FieldType.Property.OMIT_POSITIONS_AND_TERM_FREQS + )); + + public BinaryField(String name, byte[] value) { + super(name, BinaryField.DEFAULT_TYPE, value); + this.isBinary = true; + } + + public boolean isNumeric() { + return false; + } +} Index: Field.java =================================================================== --- Field.java (revision 1134546) +++ Field.java (working copy) @@ -20,546 +20,335 @@ import java.io.Reader; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.document.NumericField.DataType; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.StringHelper; /** - A field is a section of a Document. Each field has two parts, a name and a - value. Values may be free text, provided as a String or as a Reader, or they - may be atomic keywords, which are not further processed. Such keywords may - be used to represent dates, urls, etc. Fields are optionally stored in the - index, so that they may be returned with hits on the document. - */ + * A field is a section of a Document. Each field has two parts, a name and a + * value. Values may be free text, provided as a String or as a Reader, or they + * may be atomic keywords, which are not further processed. Such keywords may be + * used to represent dates, urls, etc. Fields are optionally stored in the + * index, so that they may be returned with hits on the document. + */ -public final class Field extends AbstractField implements Fieldable { +public class Field implements IndexableField { - /** Specifies whether and how a field should be stored. */ - public static enum Store { + protected FieldType type; + protected String name = "body"; + // the data object for all different kind of field values + protected Object fieldsData = null; + // pre-analyzed tokenStream for indexed fields + protected TokenStream tokenStream; + protected boolean isBinary = false; + // length/offset for all primitive types + protected int binaryLength; + protected int binaryOffset; + + protected float boost = 1.0f; - /** Store the original field value in the index. This is useful for short texts - * like a document's title which should be displayed with the results. The - * value is stored in its original form, i.e. no analyzer is used before it is - * stored. - */ - YES { - @Override - public boolean isStored() { return true; } - }, - - /** Do not store the field value in the index. */ - NO { - @Override - public boolean isStored() { return false; } - }; - - public abstract boolean isStored(); + public Field(String name, FieldType type) { + } - - /** Specifies whether and how a field should be indexed. */ - public static enum Index { - - /** Do not index the field value. This field can thus not be searched, - * but one can still access its contents provided it is - * {@link Field.Store stored}. */ - NO { - @Override - public boolean isIndexed() { return false; } - @Override - public boolean isAnalyzed() { return false; } - @Override - public boolean omitNorms() { return true; } - }, - - /** Index the tokens produced by running the field's - * value through an Analyzer. This is useful for - * common text. */ - ANALYZED { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return true; } - @Override - public boolean omitNorms() { return false; } - }, - - /** Index the field's value without using an Analyzer, so it can be searched. - * As no analyzer is used the value will be stored as a single term. This is - * useful for unique Ids like product numbers. - */ - NOT_ANALYZED { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return false; } - @Override - public boolean omitNorms() { return false; } - }, - - /** Expert: Index the field's value without an Analyzer, - * and also disable the indexing of norms. Note that you - * can also separately enable/disable norms by calling - * {@link Field#setOmitNorms}. No norms means that - * index-time field and document boosting and field - * length normalization are disabled. The benefit is - * less memory usage as norms take up one byte of RAM - * per indexed field for every document in the index, - * during searching. Note that once you index a given - * field with norms disabled, enabling norms will - * have no effect. In other words, for this to have the - * above described effect on a field, one instance of - * that field must be indexed with NOT_ANALYZED_NO_NORMS - * at some point. */ - NOT_ANALYZED_NO_NORMS { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return false; } - @Override - public boolean omitNorms() { return true; } - }, - - /** Expert: Index the tokens produced by running the - * field's value through an Analyzer, and also - * separately disable the storing of norms. See - * {@link #NOT_ANALYZED_NO_NORMS} for what norms are - * and why you may want to disable them. */ - ANALYZED_NO_NORMS { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return true; } - @Override - public boolean omitNorms() { return true; } - }; - - /** Get the best representation of the index given the flags. */ - public static Index toIndex(boolean indexed, boolean analyzed) { - return toIndex(indexed, analyzed, false); - } - - /** Expert: Get the best representation of the index given the flags. */ - public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) { - - // If it is not indexed nothing else matters - if (!indexed) { - return Index.NO; - } - - // typical, non-expert - if (!omitNorms) { - if (analyzed) { - return Index.ANALYZED; - } - return Index.NOT_ANALYZED; - } - - // Expert: Norms omitted - if (analyzed) { - return Index.ANALYZED_NO_NORMS; - } - return Index.NOT_ANALYZED_NO_NORMS; - } - - public abstract boolean isIndexed(); - public abstract boolean isAnalyzed(); - public abstract boolean omitNorms(); + + public Field(String name, FieldType type, Reader reader) { + if (name == null) + throw new NullPointerException("name cannot be null"); + if (reader == null) + throw new NullPointerException("reader cannot be null"); + + this.name = StringHelper.intern(name); // field names are interned + this.fieldsData = reader; } - - /** Specifies whether and how a field should have term vectors. */ - public static enum TermVector { + + public Field(String name, FieldType type, TokenStream tokenStream) { + if (name == null) + throw new NullPointerException("name cannot be null"); + if (tokenStream == null) + throw new NullPointerException("tokenStream cannot be null"); - /** Do not store term vectors. - */ - NO { - @Override - public boolean isStored() { return false; } - @Override - public boolean withPositions() { return false; } - @Override - public boolean withOffsets() { return false; } - }, + this.name = StringHelper.intern(name); // field names are interned + this.fieldsData = null; + this.tokenStream = tokenStream; + } + + public Field(String name, FieldType type, byte[] value) { + this(name, type, value, 0, value.length); + } + + public Field(String name, FieldType type, byte[] value, int offset, int length) { + this.isBinary = true; + this.fieldsData = value; + this.name = StringHelper.intern(name); + } + + public Field(String name, FieldType type, String value) { + this(name, true, type, value); + } + + public Field(String name, boolean internName, FieldType type, String value) { + if (name == null) + throw new IllegalArgumentException("name cannot be null"); + if (value == null) + throw new IllegalArgumentException("value cannot be null"); - /** Store the term vectors of each document. A term vector is a list - * of the document's terms and their number of occurrences in that document. */ - YES { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return false; } - @Override - public boolean withOffsets() { return false; } - }, + this.type = type; + this.name = name; + this.fieldsData = value; - /** - * Store the term vector + token position information - * - * @see #YES - */ - WITH_POSITIONS { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return true; } - @Override - public boolean withOffsets() { return false; } - }, - - /** - * Store the term vector + Token offset information - * - * @see #YES - */ - WITH_OFFSETS { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return false; } - @Override - public boolean withOffsets() { return true; } - }, - - /** - * Store the term vector + Token position and offset information - * - * @see #YES - * @see #WITH_POSITIONS - * @see #WITH_OFFSETS - */ - WITH_POSITIONS_OFFSETS { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return true; } - @Override - public boolean withOffsets() { return true; } - }; - - /** Get the best representation of a TermVector given the flags. */ - public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) { - - // If it is not stored, nothing else matters. - if (!stored) { - return TermVector.NO; - } - - if (withOffsets) { - if (withPositions) { - return Field.TermVector.WITH_POSITIONS_OFFSETS; - } - return Field.TermVector.WITH_OFFSETS; - } - - if (withPositions) { - return Field.TermVector.WITH_POSITIONS; - } - return Field.TermVector.YES; - } - - public abstract boolean isStored(); - public abstract boolean withPositions(); - public abstract boolean withOffsets(); + if (internName) // field names are optionally interned + name = StringHelper.intern(name); } + /** + * The value of the field as a String, or null. If null, the Reader value or + * binary value is used. Exactly one of stringValue(), readerValue(), and + * getBinaryValue() must be set. + */ + public String stringValue() { + return fieldsData instanceof String ? (String) fieldsData : null; + } - /** The value of the field as a String, or null. If null, the Reader value or - * binary value is used. Exactly one of stringValue(), - * readerValue(), and getBinaryValue() must be set. */ - public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; } + /** + * The value of the field as a Reader, or null. If null, the String value or + * binary value is used. Exactly one of stringValue(), readerValue(), and + * getBinaryValue() must be set. + */ + public Reader readerValue() { + return fieldsData instanceof Reader ? (Reader) fieldsData : null; + } - /** The value of the field as a Reader, or null. If null, the String value or - * binary value is used. Exactly one of stringValue(), - * readerValue(), and getBinaryValue() must be set. */ - public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; } - - /** The TokesStream for this field to be used when indexing, or null. If null, the Reader value - * or String value is analyzed to produce the indexed tokens. */ - public TokenStream tokenStreamValue() { return tokenStream; } - + /** + * The TokesStream for this field to be used when indexing, or null. If null, + * the Reader value or String value is analyzed to produce the indexed tokens. + */ + public TokenStream tokenStreamValue() { + return tokenStream; + } + public Number getNumericValue() { return null; } - + public NumericField.DataType getDataType() { return null; } - - /**
Expert: change the value of this field. This can - * be used during indexing to re-use a single Field - * instance to improve indexing speed by avoiding GC cost - * of new'ing and reclaiming Field instances. Typically - * a single {@link Document} instance is re-used as - * well. This helps most on small documents.
+ + /** + *+ * Expert: change the value of this field. This can be used during indexing to + * re-use a single Field instance to improve indexing speed by avoiding GC + * cost of new'ing and reclaiming Field instances. Typically a single + * {@link Document} instance is re-used as well. This helps most on small + * documents. + *
* - *Each Field instance should only be used once - * within a single {@link Document} instance. See ImproveIndexingSpeed - * for details.
*/ + *+ * Each Field instance should only be used once within a single + * {@link Document} instance. See ImproveIndexingSpeed for details. + *
+ */ public void setValue(String value) { if (isBinary) { - throw new IllegalArgumentException("cannot set a String value on a binary field"); + throw new IllegalArgumentException( + "cannot set a String value on a binary field"); } fieldsData = value; } - - /** Expert: change the value of this field. See setValue(String). */ + + /** + * Expert: change the value of this field. See setValue(String). + */ public void setValue(Reader value) { if (isBinary) { - throw new IllegalArgumentException("cannot set a Reader value on a binary field"); + throw new IllegalArgumentException( + "cannot set a Reader value on a binary field"); } - if (isStored) { - throw new IllegalArgumentException("cannot set a Reader value on a stored field"); + if (stored()) { + throw new IllegalArgumentException( + "cannot set a Reader value on a stored field"); } fieldsData = value; } - - /** Expert: change the value of this field. See setValue(String). */ + + /** + * Expert: change the value of this field. See setValue(String). + */ public void setValue(byte[] value) { if (!isBinary) { - throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field"); + throw new IllegalArgumentException( + "cannot set a byte[] value on a non-binary field"); } fieldsData = value; binaryLength = value.length; binaryOffset = 0; } - - /** Expert: change the value of this field. See setValue(String). */ + + /** + * Expert: change the value of this field. See setValue(String). + */ public void setValue(byte[] value, int offset, int length) { if (!isBinary) { - throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field"); + throw new IllegalArgumentException( + "cannot set a byte[] value on a non-binary field"); } fieldsData = value; binaryLength = length; binaryOffset = offset; } - /** Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. - * May be combined with stored values from stringValue() or getBinaryValue() */ + /** + * Expert: sets the token stream to be used for indexing and causes + * isIndexed() and isTokenized() to return true. May be combined with stored + * values from stringValue() or getBinaryValue() + */ public void setTokenStream(TokenStream tokenStream) { - this.isIndexed = true; - this.isTokenized = true; + if (!indexed() || !tokenized()) { + throw new IllegalArgumentException( + "cannot set token stream on non indexed and tokenized field"); + } this.tokenStream = tokenStream; } - - /** - * Create a field by specifying its name, value and how it will - * be saved in the index. Term vectors will not be stored in the index. - * - * @param name The name of the field - * @param value The string to process - * @param store Whethervalue should be stored in the index
- * @param index Whether the field should be indexed, and if so, if it should
- * be tokenized before indexing
- * @throws NullPointerException if name or value is null
- * @throws IllegalArgumentException if the field is neither stored nor indexed
- */
- public Field(String name, String value, Store store, Index index) {
- this(name, value, store, index, TermVector.NO);
+
+ public String name() {
+ return name;
}
- /**
- * Create a field by specifying its name, value and how it will
- * be saved in the index.
- *
- * @param name The name of the field
- * @param value The string to process
- * @param store Whether value should be stored in the index
- * @param index Whether the field should be indexed, and if so, if it should
- * be tokenized before indexing
- * @param termVector Whether term vector should be stored
- * @throws NullPointerException if name or value is null
- * @throws IllegalArgumentException in any of the following situations:
- * TermVector.YESvalue should be stored in the index
- * @param index Whether the field should be indexed, and if so, if it should
- * be tokenized before indexing
- * @param termVector Whether term vector should be stored
- * @throws NullPointerException if name or value is null
- * @throws IllegalArgumentException in any of the following situations:
- * TermVector.YES
+ * The boost is multiplied by
+ * {@link org.apache.lucene.document.Document#getBoost()} of the document
+ * containing this field. If a document has multiple fields with the same
+ * name, all such values are multiplied together. This product is then used to
+ * compute the norm factor for the field. By default, in the
+ * {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)}
+ * method, the boost value is multiplied by the length normalization factor
+ * and then rounded by
+ * {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before
+ * it is stored in the index. One should attempt to ensure that this product
+ * does not overflow the range of that encoding.
*
- * @param name The name of the field
- * @param reader The reader with the content
- * @throws NullPointerException if name or reader is This class provides a {@link Field} that enables indexing
- * of numeric values for efficient range filtering and
- * sorting. Here's an example usage, adding an int value:
+ *
+ * This class provides a {@link Field} that enables indexing of numeric values
+ * for efficient range filtering and sorting. Here's an example usage, adding an
+ * int value:
+ *
* The java native types To perform range querying or filtering against a
- * By default, a You may add the same field name as a A Within Lucene, each numeric value is indexed as a
- * trie structure, where each term is logically
- * assigned to larger and larger pre-defined brackets (which
- * are simply lower-precision representations of the value).
- * The step size between each successive bracket is called the
- * For more information on the internals of numeric trie
- * indexing, including the If you only need to sort by numeric value, and never
- * run range querying/filtering, you can index using a
- * More advanced users can instead use {@link
- * NumericTokenStream} directly, when indexing numbers. This
- * class is a wrapper around this token stream type for
- * easier, more intuitive usage.
+ * The java native types
+ * To perform range querying or filtering against a
+ * By default, a
+ * You may add the same field name as a
+ * A
+ * Within Lucene, each numeric value is indexed as a trie structure,
+ * where each term is logically assigned to larger and larger pre-defined
+ * brackets (which are simply lower-precision representations of the value). The
+ * step size between each successive bracket is called the
+ *
+ * For more information on the internals of numeric trie indexing, including the
+ *
+ *
+ * If you only need to sort by numeric value, and never run range
+ * querying/filtering, you can index using a
+ * More advanced users can instead use {@link NumericTokenStream} directly, when
+ * indexing numbers. This class is a wrapper around this token stream type for
+ * easier, more intuitive usage.
+ * null
+ * @see org.apache.lucene.document.Document#setBoost(float)
+ * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
+ * @see org.apache.lucene.search.Similarity#encodeNormValue(float)
*/
- public Field(String name, Reader reader) {
- this(name, reader, TermVector.NO);
+ public void setBoost(float boost) {
+ this.boost = boost;
}
+
+ private byte[] getBinaryValue(byte[] result /* unused */) {
+ if (isBinary || fieldsData instanceof byte[]) return (byte[]) fieldsData;
+ else return null;
+ }
+
+ public boolean numeric() {
+ return false;
+ }
- /**
- * Create a tokenized and indexed field that is not stored, optionally with
- * storing term vectors. The Reader is read only when the Document is added to the index,
- * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Iterable)}
- * has been called.
- *
- * @param name The name of the field
- * @param reader The reader with the content
- * @param termVector Whether term vector should be stored
- * @throws NullPointerException if name or reader is null
- */
- public Field(String name, Reader reader, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (reader == null)
- throw new NullPointerException("reader cannot be null");
-
- this.name = StringHelper.intern(name); // field names are interned
- this.fieldsData = reader;
-
- this.isStored = false;
-
- this.isIndexed = true;
- this.isTokenized = true;
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
+ public Number numericValue() {
+ return null;
}
- /**
- * Create a tokenized and indexed field that is not stored. Term vectors will
- * not be stored. This is useful for pre-analyzed fields.
- * The TokenStream is read only when the Document is added to the index,
- * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Iterable)}
- * has been called.
- *
- * @param name The name of the field
- * @param tokenStream The TokenStream with the content
- * @throws NullPointerException if name or tokenStream is null
- */
- public Field(String name, TokenStream tokenStream) {
- this(name, tokenStream, TermVector.NO);
+ public DataType numericDataType() {
+ return null;
}
- /**
- * Create a tokenized and indexed field that is not stored, optionally with
- * storing term vectors. This is useful for pre-analyzed fields.
- * The TokenStream is read only when the Document is added to the index,
- * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Iterable)}
- * has been called.
- *
- * @param name The name of the field
- * @param tokenStream The TokenStream with the content
- * @param termVector Whether term vector should be stored
- * @throws NullPointerException if name or tokenStream is null
- */
- public Field(String name, TokenStream tokenStream, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (tokenStream == null)
- throw new NullPointerException("tokenStream cannot be null");
-
- this.name = StringHelper.intern(name); // field names are interned
- this.fieldsData = null;
- this.tokenStream = tokenStream;
-
- this.isStored = false;
-
- this.isIndexed = true;
- this.isTokenized = true;
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
+ private byte[] getBinaryValue() {
+ return getBinaryValue(null);
}
-
+ public BytesRef binaryValue(BytesRef reuse) {
+ final byte[] bytes = getBinaryValue();
+ if (bytes != null) {
+ if (reuse == null) {
+ return new BytesRef(bytes, getBinaryOffset(), getBinaryLength());
+ } else {
+ reuse.bytes = bytes;
+ reuse.offset = getBinaryOffset();
+ reuse.length = getBinaryLength();
+ return reuse;
+ }
+ } else {
+ return null;
+ }
+ }
+
/**
- * Create a stored field with binary value. Optionally the value may be compressed.
+ * Returns length of byte[] segment that is used as value, if Field is not
+ * binary returned value is undefined
*
- * @param name The name of the field
- * @param value The binary value
+ * @return length of byte[] segment that represents this Field value
*/
- public Field(String name, byte[] value) {
- this(name, value, 0, value.length);
+ private int getBinaryLength() {
+ if (isBinary) {
+ return binaryLength;
+ } else if (fieldsData instanceof byte[]) return ((byte[]) fieldsData).length;
+ else return 0;
}
-
+
/**
- * Create a stored field with binary value. Optionally the value may be compressed.
+ * Returns offset into byte[] segment that is used as value, if Field is not
+ * binary returned value is undefined
*
- * @param name The name of the field
- * @param value The binary value
- * @param offset Starting offset in value where this Field's bytes are
- * @param length Number of bytes to use for this Field, starting at offset
+ * @return index of the first character in byte[] segment that represents this
+ * Field value
*/
- public Field(String name, byte[] value, int offset, int length) {
-
- if (name == null)
- throw new IllegalArgumentException("name cannot be null");
- if (value == null)
- throw new IllegalArgumentException("value cannot be null");
-
- this.name = StringHelper.intern(name); // field names are interned
- fieldsData = value;
-
- isStored = true;
- isIndexed = false;
- isTokenized = false;
- omitTermFreqAndPositions = false;
- omitNorms = true;
-
- isBinary = true;
- binaryLength = length;
- binaryOffset = offset;
-
- setStoreTermVector(TermVector.NO);
+ public int getBinaryOffset() {
+ return binaryOffset;
}
+
+ public boolean isBinary() {
+ return isBinary;
+ }
+
+ /** methods from inner FieldType */
+
+ public boolean stored() {
+ return type.stored();
+ }
+
+ public boolean indexed() {
+ return type.indexed();
+ }
+
+ public boolean tokenized() {
+ return type.tokenized();
+ }
+
+ public boolean omitNorms() {
+ return type.omitNorms();
+ }
+
+ public boolean omitTermFreqAndPositions() {
+ return type.omitTermFreqAndPositions();
+ }
+
+ public boolean storeTermVectors() {
+ return type.storeTermVectors();
+ }
+
+ public boolean storeTermVectorOffsets() {
+ return type.storeTermVectorOffsets();
+ }
+
+ public boolean storeTermVectorPositions() {
+ return type.storeTermVectorPositions();
+ }
+
+ public boolean lazy() {
+ return type.lazy();
+ }
}
Index: FieldType.java
===================================================================
--- FieldType.java (revision 0)
+++ FieldType.java (revision 0)
@@ -0,0 +1,157 @@
+package org.apache.lucene.document;
+
+import java.util.EnumSet;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class FieldType {
+
+ public enum Property {
+ INDEXED
+ , STORED
+ , TOKENIZED
+ , STORE_TERM_VECTORS
+ , STORE_TERM_VECTORS_OFFSETS
+ , STORE_TERM_VECTORS_POSITIONS
+ , OMIT_NORMS
+ , OMIT_POSITIONS_AND_TERM_FREQS
+ , LAZY
+ }
+
+ private final EnumSet
- * document.add(new NumericField(name).setIntValue(value));
+ * document.add(new NumericField(name).setIntValue(value));
*
- *
- * For optimal performance, re-use the
- * NumericField and {@link Document} instance for more than
- * one document:
- *
+ *
+ * For optimal performance, re-use the NumericField and
+ * {@link Document} instance for more than one document:
+ *
*
* NumericField field = new NumericField(name);
* Document document = new Document();
* document.add(field);
- *
+ *
* for(all documents) {
* ...
* field.setIntValue(value)
@@ -51,172 +53,205 @@
* ...
* }
*
- *
- * int, long,
- * float and double are
- * directly supported. However, any value that can be
- * converted into these native types can also be indexed.
- * For example, date/time values represented by a
- * {@link java.util.Date} can be translated into a long
- * value using the {@link java.util.Date#getTime} method. If you
- * don't need millisecond precision, you can quantize the
- * value, either by dividing the result of
- * {@link java.util.Date#getTime} or using the separate getters
- * (for year, month, etc.) to construct an int or
- * long value.NumericField, use {@link NumericRangeQuery} or {@link
- * NumericRangeFilter}. To sort according to a
- * NumericField, use the normal numeric sort types, eg
- * {@link SortField#INT}. NumericField values
- * can also be loaded directly from {@link FieldCache}.NumericField's value is not stored but
- * is indexed for range filtering and sorting. You can use
- * the {@link #NumericField(String,Field.Store,boolean)}
- * constructor if you need to change these defaults.NumericField to
- * the same document more than once. Range querying and
- * filtering will be the logical OR of all values; so a range query
- * will hit all documents that have at least one value in
- * the range. However sort behavior is not defined. If you need to sort,
- * you should separately index a single-valued NumericField.NumericField will consume somewhat more disk space
- * in the index than an ordinary single-valued field.
- * However, for a typical index that includes substantial
- * textual content per document, this increase will likely
- * be in the noise. precisionStep, measured in bits. Smaller
- * precisionStep values result in larger number
- * of brackets, which consumes more disk space in the index
- * but may result in faster range search performance. The
- * default value, 4, was selected for a reasonable tradeoff
- * of disk space consumption versus performance. You can
- * use the expert constructor {@link
- * #NumericField(String,int,Field.Store,boolean)} if you'd
- * like to change the value. Note that you must also
- * specify a congruent value when creating {@link
- * NumericRangeQuery} or {@link NumericRangeFilter}.
- * For low cardinality fields larger precision steps are good.
- * If the cardinality is < 100, it is fair
- * to use {@link Integer#MAX_VALUE}, which produces one
- * term per value.
- *
- * precisionStep
- * configuration, see {@link NumericRangeQuery}. The format of
- * indexed values is described in {@link NumericUtils}.
- *
- * precisionStep of {@link Integer#MAX_VALUE}.
- * This will minimize disk space consumed. int, long, float
+ * and double are directly supported. However, any value that can
+ * be converted into these native types can also be indexed. For example,
+ * date/time values represented by a {@link java.util.Date} can be translated
+ * into a long value using the {@link java.util.Date#getTime} method. If you
+ * don't need millisecond precision, you can quantize the value, either by
+ * dividing the result of {@link java.util.Date#getTime} or using the separate
+ * getters (for year, month, etc.) to construct an int or
+ * long value.
+ * NumericField,
+ * use {@link NumericRangeQuery} or {@link NumericRangeFilter}. To sort
+ * according to a NumericField, use the normal numeric sort types,
+ * eg {@link SortField#INT}. NumericField values can also be loaded
+ * directly from {@link FieldCache}.
+ * NumericField's value is not stored but is indexed
+ * for range filtering and sorting. You can use the
+ * {@link #NumericField(String,Field.Store,boolean)} constructor if you need to
+ * change these defaults.
+ * NumericField to the same
+ * document more than once. Range querying and filtering will be the logical OR
+ * of all values; so a range query will hit all documents that have at least one
+ * value in the range. However sort behavior is not defined. If you need to
+ * sort, you should separately index a single-valued NumericField.
+ * NumericField will consume somewhat more disk space in the
+ * index than an ordinary single-valued field. However, for a typical index that
+ * includes substantial textual content per document, this increase will likely
+ * be in the noise.
+ * precisionStep, measured in bits. Smaller
+ * precisionStep values result in larger number of brackets, which
+ * consumes more disk space in the index but may result in faster range search
+ * performance. The default value, 4, was selected for a reasonable tradeoff of
+ * disk space consumption versus performance. You can use the expert constructor
+ * {@link #NumericField(String,int,Field.Store,boolean)} if you'd like to change
+ * the value. Note that you must also specify a congruent value when creating
+ * {@link NumericRangeQuery} or {@link NumericRangeFilter}. For low cardinality
+ * fields larger precision steps are good. If the cardinality is < 100, it is
+ * fair to use {@link Integer#MAX_VALUE}, which produces one term per value.
+ *
+ * precisionStep configuration, see {@link NumericRangeQuery}.
+ * The format of indexed values is described in {@link NumericUtils}.
+ *
+ * precisionStep of
+ * {@link Integer#MAX_VALUE}. This will minimize disk space consumed.
+ * precisionStep
- * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
- * a numeric value, before indexing a document containing this field,
- * set a value using the various set???Value() methods.
- * This constructor creates an indexed, but not stored field.
- * @param name the field name
+ * Creates a field for numeric values using the default
+ * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * The instance is not yet initialized with a numeric value, before indexing a
+ * document containing this field, set a value using the various set
+ * ???Value() methods. This constructor creates an indexed, but not
+ * stored field.
+ *
+ * @param name
+ * the field name
*/
public NumericField(String name) {
- this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true);
+ this(name, NumericUtils.PRECISION_STEP_DEFAULT, NumericField.DEFAULT_TYPE);
}
/**
- * Creates a field for numeric values using the default precisionStep
- * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
- * a numeric value, before indexing a document containing this field,
- * set a value using the various set???Value() methods.
- * @param name the field name
- * @param store if the field should be stored, {@link Document#getFieldable}
- * then returns {@code NumericField} instances on search results.
- * @param index if the field should be indexed using {@link NumericTokenStream}
+ * Creates a field for numeric values using the default
+ * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ * The instance is not yet initialized with a numeric value, before indexing a
+ * document containing this field, set a value using the various set
+ * ???Value() methods.
+ *
+ * @param name
+ * the field name
+ * @param store
+ * if the field should be stored, {@link Document#getFieldable} then
+ * returns {@code NumericField} instances on search results.
+ * @param index
+ * if the field should be indexed using {@link NumericTokenStream}
*/
- public NumericField(String name, Field.Store store, boolean index) {
- this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index);
+ public NumericField(String name, FieldType type) {
+ this(name, NumericUtils.PRECISION_STEP_DEFAULT, type);
}
/**
* Creates a field for numeric values with the specified
- * precisionStep. The instance is not yet initialized with
- * a numeric value, before indexing a document containing this field,
- * set a value using the various set???Value() methods.
- * This constructor creates an indexed, but not stored field.
- * @param name the field name
- * @param precisionStep the used precision step
+ * precisionStep. The instance is not yet initialized with a
+ * numeric value, before indexing a document containing this field, set a
+ * value using the various set???Value() methods. This constructor
+ * creates an indexed, but not stored field.
+ *
+ * @param name
+ * the field name
+ * @param precisionStep
+ * the used precision step
*/
public NumericField(String name, int precisionStep) {
- this(name, precisionStep, Field.Store.NO, true);
+ this(name, precisionStep, NumericField.DEFAULT_TYPE);
}
-
+
/**
* Creates a field for numeric values with the specified
- * precisionStep. The instance is not yet initialized with
- * a numeric value, before indexing a document containing this field,
- * set a value using the various set???Value() methods.
- * @param name the field name
- * @param precisionStep the used precision step
- * @param store if the field should be stored, {@link Document#getFieldable}
- * then returns {@code NumericField} instances on search results.
- * @param index if the field should be indexed using {@link NumericTokenStream}
+ * precisionStep. The instance is not yet initialized with a
+ * numeric value, before indexing a document containing this field, set a
+ * value using the various set???Value() methods.
+ *
+ * @param name
+ * the field name
+ * @param precisionStep
+ * the used precision step
+ * @param store
+ * if the field should be stored, {@link Document#getFieldable} then
+ * returns {@code NumericField} instances on search results.
+ * @param index
+ * if the field should be indexed using {@link NumericTokenStream}
*/
- public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
- super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
+ public NumericField(String name, int precisionStep, FieldType type) {
+ super(name, type);
this.precisionStep = precisionStep;
- setOmitTermFreqAndPositions(true);
}
-
+
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
- public TokenStream tokenStreamValue() {
- if (!isIndexed())
- return null;
+ public TokenStream tokenStreamValue() {
+ if (!indexed()) return null;
if (numericTS == null) {
- // lazy init the TokenStream as it is heavy to instantiate (attributes,...),
+ // lazy init the TokenStream as it is heavy to instantiate
+ // (attributes,...),
// if not needed (stored field loading)
numericTS = new NumericTokenStream(precisionStep);
// initialize value in TokenStream
if (fieldsData != null) {
- assert type != null;
+ assert dataType != null;
final Number val = (Number) fieldsData;
- switch (type) {
+ switch (dataType) {
case INT:
- numericTS.setIntValue(val.intValue()); break;
+ numericTS.setIntValue(val.intValue());
+ break;
case LONG:
- numericTS.setLongValue(val.longValue()); break;
+ numericTS.setLongValue(val.longValue());
+ break;
case FLOAT:
- numericTS.setFloatValue(val.floatValue()); break;
+ numericTS.setFloatValue(val.floatValue());
+ break;
case DOUBLE:
- numericTS.setDoubleValue(val.doubleValue()); break;
+ numericTS.setDoubleValue(val.doubleValue());
+ break;
default:
assert false : "Should never get here";
}
@@ -229,16 +264,22 @@
public Reader readerValue() {
return null;
}
-
- /** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
- * on search results. It is recommended to use {@link Document#getFieldable} instead
- * that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
- * to return the stored value. */
- public String stringValue() {
+
+ /**
+ * Returns the numeric value as a string. This format is also returned if you
+ * call {@link Document#get(String)} on search results. It is recommended to
+ * use {@link Document#getFieldable} instead that returns {@code NumericField}
+ * instances. You can then use {@link #getNumericValue} to return the stored
+ * value.
+ */
+ public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
}
- /** Returns the current numeric value as a subclass of {@link Number}, null if not yet initialized. */
+ /**
+ * Returns the current numeric value as a subclass of {@link Number},
+ * null if not yet initialized.
+ */
public Number getNumericValue() {
return (Number) fieldsData;
}
@@ -248,67 +289,77 @@
return precisionStep;
}
- /** Returns the data type of the current value, {@code null} if not yet set.
+ /**
+ * Returns the data type of the current value, {@code null} if not yet set.
+ *
* @since 3.2
*/
public DataType getDataType() {
- return type;
+ return dataType;
}
-
+
public boolean isNumeric() {
return true;
}
/**
* Initializes the field with the supplied long value.
- * @param value the numeric value
+ *
+ * @param value
+ * the numeric value
* @return this instance, because of this you can use it the following way:
- * document.add(new NumericField(name, precisionStep).setLongValue(value))
+ * document.add(new NumericField(name, precisionStep).setLongValue(value))
*/
public NumericField setLongValue(final long value) {
if (numericTS != null) numericTS.setLongValue(value);
fieldsData = Long.valueOf(value);
- type = DataType.LONG;
+ dataType = DataType.LONG;
return this;
}
/**
* Initializes the field with the supplied int value.
- * @param value the numeric value
+ *
+ * @param value
+ * the numeric value
* @return this instance, because of this you can use it the following way:
- * document.add(new NumericField(name, precisionStep).setIntValue(value))
+ * document.add(new NumericField(name, precisionStep).setIntValue(value))
*/
public NumericField setIntValue(final int value) {
if (numericTS != null) numericTS.setIntValue(value);
fieldsData = Integer.valueOf(value);
- type = DataType.INT;
+ dataType = DataType.INT;
return this;
}
/**
* Initializes the field with the supplied double value.
- * @param value the numeric value
+ *
+ * @param value
+ * the numeric value
* @return this instance, because of this you can use it the following way:
- * document.add(new NumericField(name, precisionStep).setDoubleValue(value))
+ * document.add(new NumericField(name, precisionStep).setDoubleValue(value))
*/
public NumericField setDoubleValue(final double value) {
if (numericTS != null) numericTS.setDoubleValue(value);
fieldsData = Double.valueOf(value);
- type = DataType.DOUBLE;
+ dataType = DataType.DOUBLE;
return this;
}
/**
* Initializes the field with the supplied float value.
- * @param value the numeric value
+ *
+ * @param value
+ * the numeric value
* @return this instance, because of this you can use it the following way:
- * document.add(new NumericField(name, precisionStep).setFloatValue(value))
+ * document.add(new NumericField(name, precisionStep).setFloatValue(value))
*/
public NumericField setFloatValue(final float value) {
if (numericTS != null) numericTS.setFloatValue(value);
fieldsData = Float.valueOf(value);
- type = DataType.FLOAT;
+ dataType = DataType.FLOAT;
return this;
}
-
+
}
Index: StringField.java
===================================================================
--- StringField.java (revision 0)
+++ StringField.java (revision 0)
@@ -0,0 +1,44 @@
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.EnumSet;
+
+public final class StringField extends Field {
+
+ public static FieldType DEFAULT_TYPE = new FieldType(EnumSet.of(FieldType.Property.INDEXED
+ , FieldType.Property.OMIT_NORMS
+ , FieldType.Property.OMIT_POSITIONS_AND_TERM_FREQS
+ ));
+
+ public StringField(String name, boolean internName, String value) {
+ super(name, StringField.DEFAULT_TYPE, value);
+ }
+
+ public StringField(String name, String value) {
+ this(name, true, value);
+ }
+
+ public String stringValue() {
+ return (fieldsData == null) ? null : fieldsData.toString();
+ }
+
+ public boolean isNumeric() {
+ return false;
+ }
+}
Index: TextField.java
===================================================================
--- TextField.java (revision 0)
+++ TextField.java (revision 0)
@@ -0,0 +1,35 @@
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.util.EnumSet;
+
+public final class TextField extends Field {
+
+ public static FieldType DEFAULT_TYPE = new FieldType(EnumSet.of(FieldType.Property.INDEXED
+ , FieldType.Property.TOKENIZED));
+
+ public TextField(String name, Reader reader) {
+ super(name, TextField.DEFAULT_TYPE, reader);
+ }
+
+ public boolean isNumeric() {
+ return false;
+ }
+}