Index: BinaryField.java =================================================================== --- BinaryField.java (revision 0) +++ BinaryField.java (revision 0) @@ -0,0 +1,37 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.EnumSet; + +public final class BinaryField extends Field { + + public static FieldType DEFAULT_TYPE = new FieldType(EnumSet.of(FieldType.Property.INDEXED + , FieldType.Property.OMIT_NORMS + , FieldType.Property.OMIT_POSITIONS_AND_TERM_FREQS + )); + + public BinaryField(String name, byte[] value) { + super(name, BinaryField.DEFAULT_TYPE, value); + this.isBinary = true; + } + + public boolean isNumeric() { + return false; + } +} Index: Field.java =================================================================== --- Field.java (revision 1134546) +++ Field.java (working copy) @@ -20,546 +20,335 @@ import java.io.Reader; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.document.NumericField.DataType; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.StringHelper; /** - A field is a section of a Document. Each field has two parts, a name and a - value. Values may be free text, provided as a String or as a Reader, or they - may be atomic keywords, which are not further processed. Such keywords may - be used to represent dates, urls, etc. Fields are optionally stored in the - index, so that they may be returned with hits on the document. - */ + * A field is a section of a Document. Each field has two parts, a name and a + * value. Values may be free text, provided as a String or as a Reader, or they + * may be atomic keywords, which are not further processed. Such keywords may be + * used to represent dates, urls, etc. Fields are optionally stored in the + * index, so that they may be returned with hits on the document. + */ -public final class Field extends AbstractField implements Fieldable { +public class Field implements IndexableField { - /** Specifies whether and how a field should be stored. */ - public static enum Store { + protected FieldType type; + protected String name = "body"; + // the data object for all different kind of field values + protected Object fieldsData = null; + // pre-analyzed tokenStream for indexed fields + protected TokenStream tokenStream; + protected boolean isBinary = false; + // length/offset for all primitive types + protected int binaryLength; + protected int binaryOffset; + + protected float boost = 1.0f; - /** Store the original field value in the index. This is useful for short texts - * like a document's title which should be displayed with the results. The - * value is stored in its original form, i.e. no analyzer is used before it is - * stored. - */ - YES { - @Override - public boolean isStored() { return true; } - }, - - /** Do not store the field value in the index. */ - NO { - @Override - public boolean isStored() { return false; } - }; - - public abstract boolean isStored(); + public Field(String name, FieldType type) { + } - - /** Specifies whether and how a field should be indexed. */ - public static enum Index { - - /** Do not index the field value. This field can thus not be searched, - * but one can still access its contents provided it is - * {@link Field.Store stored}. */ - NO { - @Override - public boolean isIndexed() { return false; } - @Override - public boolean isAnalyzed() { return false; } - @Override - public boolean omitNorms() { return true; } - }, - - /** Index the tokens produced by running the field's - * value through an Analyzer. This is useful for - * common text. */ - ANALYZED { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return true; } - @Override - public boolean omitNorms() { return false; } - }, - - /** Index the field's value without using an Analyzer, so it can be searched. - * As no analyzer is used the value will be stored as a single term. This is - * useful for unique Ids like product numbers. - */ - NOT_ANALYZED { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return false; } - @Override - public boolean omitNorms() { return false; } - }, - - /** Expert: Index the field's value without an Analyzer, - * and also disable the indexing of norms. Note that you - * can also separately enable/disable norms by calling - * {@link Field#setOmitNorms}. No norms means that - * index-time field and document boosting and field - * length normalization are disabled. The benefit is - * less memory usage as norms take up one byte of RAM - * per indexed field for every document in the index, - * during searching. Note that once you index a given - * field with norms disabled, enabling norms will - * have no effect. In other words, for this to have the - * above described effect on a field, one instance of - * that field must be indexed with NOT_ANALYZED_NO_NORMS - * at some point. */ - NOT_ANALYZED_NO_NORMS { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return false; } - @Override - public boolean omitNorms() { return true; } - }, - - /** Expert: Index the tokens produced by running the - * field's value through an Analyzer, and also - * separately disable the storing of norms. See - * {@link #NOT_ANALYZED_NO_NORMS} for what norms are - * and why you may want to disable them. */ - ANALYZED_NO_NORMS { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return true; } - @Override - public boolean omitNorms() { return true; } - }; - - /** Get the best representation of the index given the flags. */ - public static Index toIndex(boolean indexed, boolean analyzed) { - return toIndex(indexed, analyzed, false); - } - - /** Expert: Get the best representation of the index given the flags. */ - public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) { - - // If it is not indexed nothing else matters - if (!indexed) { - return Index.NO; - } - - // typical, non-expert - if (!omitNorms) { - if (analyzed) { - return Index.ANALYZED; - } - return Index.NOT_ANALYZED; - } - - // Expert: Norms omitted - if (analyzed) { - return Index.ANALYZED_NO_NORMS; - } - return Index.NOT_ANALYZED_NO_NORMS; - } - - public abstract boolean isIndexed(); - public abstract boolean isAnalyzed(); - public abstract boolean omitNorms(); + + public Field(String name, FieldType type, Reader reader) { + if (name == null) + throw new NullPointerException("name cannot be null"); + if (reader == null) + throw new NullPointerException("reader cannot be null"); + + this.name = StringHelper.intern(name); // field names are interned + this.fieldsData = reader; } - - /** Specifies whether and how a field should have term vectors. */ - public static enum TermVector { + + public Field(String name, FieldType type, TokenStream tokenStream) { + if (name == null) + throw new NullPointerException("name cannot be null"); + if (tokenStream == null) + throw new NullPointerException("tokenStream cannot be null"); - /** Do not store term vectors. - */ - NO { - @Override - public boolean isStored() { return false; } - @Override - public boolean withPositions() { return false; } - @Override - public boolean withOffsets() { return false; } - }, + this.name = StringHelper.intern(name); // field names are interned + this.fieldsData = null; + this.tokenStream = tokenStream; + } + + public Field(String name, FieldType type, byte[] value) { + this(name, type, value, 0, value.length); + } + + public Field(String name, FieldType type, byte[] value, int offset, int length) { + this.isBinary = true; + this.fieldsData = value; + this.name = StringHelper.intern(name); + } + + public Field(String name, FieldType type, String value) { + this(name, true, type, value); + } + + public Field(String name, boolean internName, FieldType type, String value) { + if (name == null) + throw new IllegalArgumentException("name cannot be null"); + if (value == null) + throw new IllegalArgumentException("value cannot be null"); - /** Store the term vectors of each document. A term vector is a list - * of the document's terms and their number of occurrences in that document. */ - YES { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return false; } - @Override - public boolean withOffsets() { return false; } - }, + this.type = type; + this.name = name; + this.fieldsData = value; - /** - * Store the term vector + token position information - * - * @see #YES - */ - WITH_POSITIONS { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return true; } - @Override - public boolean withOffsets() { return false; } - }, - - /** - * Store the term vector + Token offset information - * - * @see #YES - */ - WITH_OFFSETS { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return false; } - @Override - public boolean withOffsets() { return true; } - }, - - /** - * Store the term vector + Token position and offset information - * - * @see #YES - * @see #WITH_POSITIONS - * @see #WITH_OFFSETS - */ - WITH_POSITIONS_OFFSETS { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return true; } - @Override - public boolean withOffsets() { return true; } - }; - - /** Get the best representation of a TermVector given the flags. */ - public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) { - - // If it is not stored, nothing else matters. - if (!stored) { - return TermVector.NO; - } - - if (withOffsets) { - if (withPositions) { - return Field.TermVector.WITH_POSITIONS_OFFSETS; - } - return Field.TermVector.WITH_OFFSETS; - } - - if (withPositions) { - return Field.TermVector.WITH_POSITIONS; - } - return Field.TermVector.YES; - } - - public abstract boolean isStored(); - public abstract boolean withPositions(); - public abstract boolean withOffsets(); + if (internName) // field names are optionally interned + name = StringHelper.intern(name); } + /** + * The value of the field as a String, or null. If null, the Reader value or + * binary value is used. Exactly one of stringValue(), readerValue(), and + * getBinaryValue() must be set. + */ + public String stringValue() { + return fieldsData instanceof String ? (String) fieldsData : null; + } - /** The value of the field as a String, or null. If null, the Reader value or - * binary value is used. Exactly one of stringValue(), - * readerValue(), and getBinaryValue() must be set. */ - public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; } + /** + * The value of the field as a Reader, or null. If null, the String value or + * binary value is used. Exactly one of stringValue(), readerValue(), and + * getBinaryValue() must be set. + */ + public Reader readerValue() { + return fieldsData instanceof Reader ? (Reader) fieldsData : null; + } - /** The value of the field as a Reader, or null. If null, the String value or - * binary value is used. Exactly one of stringValue(), - * readerValue(), and getBinaryValue() must be set. */ - public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; } - - /** The TokesStream for this field to be used when indexing, or null. If null, the Reader value - * or String value is analyzed to produce the indexed tokens. */ - public TokenStream tokenStreamValue() { return tokenStream; } - + /** + * The TokesStream for this field to be used when indexing, or null. If null, + * the Reader value or String value is analyzed to produce the indexed tokens. + */ + public TokenStream tokenStreamValue() { + return tokenStream; + } + public Number getNumericValue() { return null; } - + public NumericField.DataType getDataType() { return null; } - - /**

Expert: change the value of this field. This can - * be used during indexing to re-use a single Field - * instance to improve indexing speed by avoiding GC cost - * of new'ing and reclaiming Field instances. Typically - * a single {@link Document} instance is re-used as - * well. This helps most on small documents.

+ + /** + *

+ * Expert: change the value of this field. This can be used during indexing to + * re-use a single Field instance to improve indexing speed by avoiding GC + * cost of new'ing and reclaiming Field instances. Typically a single + * {@link Document} instance is re-used as well. This helps most on small + * documents. + *

* - *

Each Field instance should only be used once - * within a single {@link Document} instance. See ImproveIndexingSpeed - * for details.

*/ + *

+ * Each Field instance should only be used once within a single + * {@link Document} instance. See ImproveIndexingSpeed for details. + *

+ */ public void setValue(String value) { if (isBinary) { - throw new IllegalArgumentException("cannot set a String value on a binary field"); + throw new IllegalArgumentException( + "cannot set a String value on a binary field"); } fieldsData = value; } - - /** Expert: change the value of this field. See setValue(String). */ + + /** + * Expert: change the value of this field. See setValue(String). + */ public void setValue(Reader value) { if (isBinary) { - throw new IllegalArgumentException("cannot set a Reader value on a binary field"); + throw new IllegalArgumentException( + "cannot set a Reader value on a binary field"); } - if (isStored) { - throw new IllegalArgumentException("cannot set a Reader value on a stored field"); + if (stored()) { + throw new IllegalArgumentException( + "cannot set a Reader value on a stored field"); } fieldsData = value; } - - /** Expert: change the value of this field. See setValue(String). */ + + /** + * Expert: change the value of this field. See setValue(String). + */ public void setValue(byte[] value) { if (!isBinary) { - throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field"); + throw new IllegalArgumentException( + "cannot set a byte[] value on a non-binary field"); } fieldsData = value; binaryLength = value.length; binaryOffset = 0; } - - /** Expert: change the value of this field. See setValue(String). */ + + /** + * Expert: change the value of this field. See setValue(String). + */ public void setValue(byte[] value, int offset, int length) { if (!isBinary) { - throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field"); + throw new IllegalArgumentException( + "cannot set a byte[] value on a non-binary field"); } fieldsData = value; binaryLength = length; binaryOffset = offset; } - /** Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. - * May be combined with stored values from stringValue() or getBinaryValue() */ + /** + * Expert: sets the token stream to be used for indexing and causes + * isIndexed() and isTokenized() to return true. May be combined with stored + * values from stringValue() or getBinaryValue() + */ public void setTokenStream(TokenStream tokenStream) { - this.isIndexed = true; - this.isTokenized = true; + if (!indexed() || !tokenized()) { + throw new IllegalArgumentException( + "cannot set token stream on non indexed and tokenized field"); + } this.tokenStream = tokenStream; } - - /** - * Create a field by specifying its name, value and how it will - * be saved in the index. Term vectors will not be stored in the index. - * - * @param name The name of the field - * @param value The string to process - * @param store Whether value should be stored in the index - * @param index Whether the field should be indexed, and if so, if it should - * be tokenized before indexing - * @throws NullPointerException if name or value is null - * @throws IllegalArgumentException if the field is neither stored nor indexed - */ - public Field(String name, String value, Store store, Index index) { - this(name, value, store, index, TermVector.NO); + + public String name() { + return name; } - /** - * Create a field by specifying its name, value and how it will - * be saved in the index. - * - * @param name The name of the field - * @param value The string to process - * @param store Whether value should be stored in the index - * @param index Whether the field should be indexed, and if so, if it should - * be tokenized before indexing - * @param termVector Whether term vector should be stored - * @throws NullPointerException if name or value is null - * @throws IllegalArgumentException in any of the following situations: - * - */ - public Field(String name, String value, Store store, Index index, TermVector termVector) { - this(name, true, value, store, index, termVector); + public float boost() { + return boost; } /** - * Create a field by specifying its name, value and how it will - * be saved in the index. + * Sets the boost factor hits on this field. This value will be multiplied + * into the score of all hits on this this field of this document. * - * @param name The name of the field - * @param internName Whether to .intern() name or not - * @param value The string to process - * @param store Whether value should be stored in the index - * @param index Whether the field should be indexed, and if so, if it should - * be tokenized before indexing - * @param termVector Whether term vector should be stored - * @throws NullPointerException if name or value is null - * @throws IllegalArgumentException in any of the following situations: - * - */ - public Field(String name, boolean internName, String value, Store store, Index index, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (value == null) - throw new NullPointerException("value cannot be null"); - if (name.length() == 0 && value.length() == 0) - throw new IllegalArgumentException("name and value cannot both be empty"); - if (index == Index.NO && store == Store.NO) - throw new IllegalArgumentException("it doesn't make sense to have a field that " - + "is neither indexed nor stored"); - if (index == Index.NO && termVector != TermVector.NO) - throw new IllegalArgumentException("cannot store term vector information " - + "for a field that is not indexed"); - - if (internName) // field names are optionally interned - name = StringHelper.intern(name); - - this.name = name; - - this.fieldsData = value; - - this.isStored = store.isStored(); - - this.isIndexed = index.isIndexed(); - this.isTokenized = index.isAnalyzed(); - this.omitNorms = index.omitNorms(); - if (index == Index.NO) { - this.omitTermFreqAndPositions = false; - } - - this.isBinary = false; - - setStoreTermVector(termVector); - } - - /** - * Create a tokenized and indexed field that is not stored. Term vectors will - * not be stored. The Reader is read only when the Document is added to the index, - * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Iterable)} - * has been called. + *

+ * The boost is multiplied by + * {@link org.apache.lucene.document.Document#getBoost()} of the document + * containing this field. If a document has multiple fields with the same + * name, all such values are multiplied together. This product is then used to + * compute the norm factor for the field. By default, in the + * {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} + * method, the boost value is multiplied by the length normalization factor + * and then rounded by + * {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before + * it is stored in the index. One should attempt to ensure that this product + * does not overflow the range of that encoding. * - * @param name The name of the field - * @param reader The reader with the content - * @throws NullPointerException if name or reader is null + * @see org.apache.lucene.document.Document#setBoost(float) + * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState) + * @see org.apache.lucene.search.Similarity#encodeNormValue(float) */ - public Field(String name, Reader reader) { - this(name, reader, TermVector.NO); + public void setBoost(float boost) { + this.boost = boost; } + + private byte[] getBinaryValue(byte[] result /* unused */) { + if (isBinary || fieldsData instanceof byte[]) return (byte[]) fieldsData; + else return null; + } + + public boolean numeric() { + return false; + } - /** - * Create a tokenized and indexed field that is not stored, optionally with - * storing term vectors. The Reader is read only when the Document is added to the index, - * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Iterable)} - * has been called. - * - * @param name The name of the field - * @param reader The reader with the content - * @param termVector Whether term vector should be stored - * @throws NullPointerException if name or reader is null - */ - public Field(String name, Reader reader, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (reader == null) - throw new NullPointerException("reader cannot be null"); - - this.name = StringHelper.intern(name); // field names are interned - this.fieldsData = reader; - - this.isStored = false; - - this.isIndexed = true; - this.isTokenized = true; - - this.isBinary = false; - - setStoreTermVector(termVector); + public Number numericValue() { + return null; } - /** - * Create a tokenized and indexed field that is not stored. Term vectors will - * not be stored. This is useful for pre-analyzed fields. - * The TokenStream is read only when the Document is added to the index, - * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Iterable)} - * has been called. - * - * @param name The name of the field - * @param tokenStream The TokenStream with the content - * @throws NullPointerException if name or tokenStream is null - */ - public Field(String name, TokenStream tokenStream) { - this(name, tokenStream, TermVector.NO); + public DataType numericDataType() { + return null; } - /** - * Create a tokenized and indexed field that is not stored, optionally with - * storing term vectors. This is useful for pre-analyzed fields. - * The TokenStream is read only when the Document is added to the index, - * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Iterable)} - * has been called. - * - * @param name The name of the field - * @param tokenStream The TokenStream with the content - * @param termVector Whether term vector should be stored - * @throws NullPointerException if name or tokenStream is null - */ - public Field(String name, TokenStream tokenStream, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (tokenStream == null) - throw new NullPointerException("tokenStream cannot be null"); - - this.name = StringHelper.intern(name); // field names are interned - this.fieldsData = null; - this.tokenStream = tokenStream; - - this.isStored = false; - - this.isIndexed = true; - this.isTokenized = true; - - this.isBinary = false; - - setStoreTermVector(termVector); + private byte[] getBinaryValue() { + return getBinaryValue(null); } - + public BytesRef binaryValue(BytesRef reuse) { + final byte[] bytes = getBinaryValue(); + if (bytes != null) { + if (reuse == null) { + return new BytesRef(bytes, getBinaryOffset(), getBinaryLength()); + } else { + reuse.bytes = bytes; + reuse.offset = getBinaryOffset(); + reuse.length = getBinaryLength(); + return reuse; + } + } else { + return null; + } + } + /** - * Create a stored field with binary value. Optionally the value may be compressed. + * Returns length of byte[] segment that is used as value, if Field is not + * binary returned value is undefined * - * @param name The name of the field - * @param value The binary value + * @return length of byte[] segment that represents this Field value */ - public Field(String name, byte[] value) { - this(name, value, 0, value.length); + private int getBinaryLength() { + if (isBinary) { + return binaryLength; + } else if (fieldsData instanceof byte[]) return ((byte[]) fieldsData).length; + else return 0; } - + /** - * Create a stored field with binary value. Optionally the value may be compressed. + * Returns offset into byte[] segment that is used as value, if Field is not + * binary returned value is undefined * - * @param name The name of the field - * @param value The binary value - * @param offset Starting offset in value where this Field's bytes are - * @param length Number of bytes to use for this Field, starting at offset + * @return index of the first character in byte[] segment that represents this + * Field value */ - public Field(String name, byte[] value, int offset, int length) { - - if (name == null) - throw new IllegalArgumentException("name cannot be null"); - if (value == null) - throw new IllegalArgumentException("value cannot be null"); - - this.name = StringHelper.intern(name); // field names are interned - fieldsData = value; - - isStored = true; - isIndexed = false; - isTokenized = false; - omitTermFreqAndPositions = false; - omitNorms = true; - - isBinary = true; - binaryLength = length; - binaryOffset = offset; - - setStoreTermVector(TermVector.NO); + public int getBinaryOffset() { + return binaryOffset; } + + public boolean isBinary() { + return isBinary; + } + + /** methods from inner FieldType */ + + public boolean stored() { + return type.stored(); + } + + public boolean indexed() { + return type.indexed(); + } + + public boolean tokenized() { + return type.tokenized(); + } + + public boolean omitNorms() { + return type.omitNorms(); + } + + public boolean omitTermFreqAndPositions() { + return type.omitTermFreqAndPositions(); + } + + public boolean storeTermVectors() { + return type.storeTermVectors(); + } + + public boolean storeTermVectorOffsets() { + return type.storeTermVectorOffsets(); + } + + public boolean storeTermVectorPositions() { + return type.storeTermVectorPositions(); + } + + public boolean lazy() { + return type.lazy(); + } } Index: FieldType.java =================================================================== --- FieldType.java (revision 0) +++ FieldType.java (revision 0) @@ -0,0 +1,157 @@ +package org.apache.lucene.document; + +import java.util.EnumSet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +public class FieldType { + + public enum Property { + INDEXED + , STORED + , TOKENIZED + , STORE_TERM_VECTORS + , STORE_TERM_VECTORS_OFFSETS + , STORE_TERM_VECTORS_POSITIONS + , OMIT_NORMS + , OMIT_POSITIONS_AND_TERM_FREQS + , LAZY + } + + private final EnumSet properties; + + public FieldType(EnumSet properties) { + this.properties = properties; + } + + public boolean indexed() { + return properties.contains(Property.INDEXED); + } + + public void setIndexed() { + properties.add(Property.INDEXED); + } + + public boolean stored() { + return properties.contains(Property.STORED); + } + + public void setStored() { + properties.add(Property.STORED); + } + + public boolean tokenized() { + return properties.contains(Property.TOKENIZED); + } + + public void setTokenized() { + properties.add(Property.TOKENIZED); + } + + public boolean storeTermVectors() { + return properties.contains(Property.STORE_TERM_VECTORS); + } + + public void setStoreTermVectors() { + properties.add(Property.STORE_TERM_VECTORS); + } + + public boolean storeTermVectorOffsets() { + return properties.contains(Property.STORE_TERM_VECTORS_OFFSETS); + } + + public void setStoreTermVectorOffsets() { + properties.add(Property.STORE_TERM_VECTORS_OFFSETS); + } + + public boolean storeTermVectorPositions() { + return properties.contains(Property.STORE_TERM_VECTORS_POSITIONS); + } + + public void setStoreTermVectorPositions() { + properties.add(Property.STORE_TERM_VECTORS_POSITIONS); + } + + public boolean omitNorms() { + return properties.contains(Property.OMIT_NORMS); + } + + public void setOmitNorms() { + properties.add(Property.OMIT_NORMS); + } + + public boolean omitTermFreqAndPositions() { + return properties.contains(Property.OMIT_POSITIONS_AND_TERM_FREQS); + } + + public void setOmitTermFreqAndPositions() { + properties.add(Property.OMIT_POSITIONS_AND_TERM_FREQS); + } + + public boolean lazy() { + return properties.contains(Property.LAZY); + } + + public void setLazy() { + properties.add(Property.LAZY); + } + + /** Prints a Field for human consumption. */ + @Override + public final String toString() { + StringBuilder result = new StringBuilder(); + if (stored()) { + result.append("stored"); + } + if (indexed()) { + if (result.length() > 0) + result.append(","); + result.append("indexed"); + } + if (tokenized()) { + if (result.length() > 0) + result.append(","); + result.append("tokenized"); + } + if (storeTermVectors()) { + if (result.length() > 0) + result.append(","); + result.append("termVector"); + } + if (storeTermVectorOffsets()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorOffsets"); + } + if (storeTermVectorPositions()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorPosition"); + } + if (omitNorms()) { + result.append(",omitNorms"); + } + if (omitTermFreqAndPositions()) { + result.append(",omitTermFreqAndPositions"); + } + if (lazy()){ + result.append(",lazy"); + } + + return result.toString(); + } +} Index: NumericField.java =================================================================== --- NumericField.java (revision 1134546) +++ NumericField.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.Reader; +import java.util.EnumSet; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.NumericTokenStream; @@ -28,22 +29,23 @@ import org.apache.lucene.search.FieldCache; // javadocs /** - *

This class provides a {@link Field} that enables indexing - * of numeric values for efficient range filtering and - * sorting. Here's an example usage, adding an int value: + *

+ * This class provides a {@link Field} that enables indexing of numeric values + * for efficient range filtering and sorting. Here's an example usage, adding an + * int value: + * *

- *  document.add(new NumericField(name).setIntValue(value));
+ * document.add(new NumericField(name).setIntValue(value));
  * 
- * - * For optimal performance, re-use the - * NumericField and {@link Document} instance for more than - * one document: - * + * + * For optimal performance, re-use the NumericField and + * {@link Document} instance for more than one document: + * *
  *  NumericField field = new NumericField(name);
  *  Document document = new Document();
  *  document.add(field);
- *
+ * 
  *  for(all documents) {
  *    ...
  *    field.setIntValue(value)
@@ -51,172 +53,205 @@
  *    ...
  *  }
  * 
- * - *

The java native types int, long, - * float and double are - * directly supported. However, any value that can be - * converted into these native types can also be indexed. - * For example, date/time values represented by a - * {@link java.util.Date} can be translated into a long - * value using the {@link java.util.Date#getTime} method. If you - * don't need millisecond precision, you can quantize the - * value, either by dividing the result of - * {@link java.util.Date#getTime} or using the separate getters - * (for year, month, etc.) to construct an int or - * long value.

- * - *

To perform range querying or filtering against a - * NumericField, use {@link NumericRangeQuery} or {@link - * NumericRangeFilter}. To sort according to a - * NumericField, use the normal numeric sort types, eg - * {@link SortField#INT}. NumericField values - * can also be loaded directly from {@link FieldCache}.

- * - *

By default, a NumericField's value is not stored but - * is indexed for range filtering and sorting. You can use - * the {@link #NumericField(String,Field.Store,boolean)} - * constructor if you need to change these defaults.

- * - *

You may add the same field name as a NumericField to - * the same document more than once. Range querying and - * filtering will be the logical OR of all values; so a range query - * will hit all documents that have at least one value in - * the range. However sort behavior is not defined. If you need to sort, - * you should separately index a single-valued NumericField.

- * - *

A NumericField will consume somewhat more disk space - * in the index than an ordinary single-valued field. - * However, for a typical index that includes substantial - * textual content per document, this increase will likely - * be in the noise.

- * - *

Within Lucene, each numeric value is indexed as a - * trie structure, where each term is logically - * assigned to larger and larger pre-defined brackets (which - * are simply lower-precision representations of the value). - * The step size between each successive bracket is called the - * precisionStep, measured in bits. Smaller - * precisionStep values result in larger number - * of brackets, which consumes more disk space in the index - * but may result in faster range search performance. The - * default value, 4, was selected for a reasonable tradeoff - * of disk space consumption versus performance. You can - * use the expert constructor {@link - * #NumericField(String,int,Field.Store,boolean)} if you'd - * like to change the value. Note that you must also - * specify a congruent value when creating {@link - * NumericRangeQuery} or {@link NumericRangeFilter}. - * For low cardinality fields larger precision steps are good. - * If the cardinality is < 100, it is fair - * to use {@link Integer#MAX_VALUE}, which produces one - * term per value. - * - *

For more information on the internals of numeric trie - * indexing, including the precisionStep - * configuration, see {@link NumericRangeQuery}. The format of - * indexed values is described in {@link NumericUtils}. - * - *

If you only need to sort by numeric value, and never - * run range querying/filtering, you can index using a - * precisionStep of {@link Integer#MAX_VALUE}. - * This will minimize disk space consumed.

- * - *

More advanced users can instead use {@link - * NumericTokenStream} directly, when indexing numbers. This - * class is a wrapper around this token stream type for - * easier, more intuitive usage.

- * + * + *

+ * The java native types int, long, float + * and double are directly supported. However, any value that can + * be converted into these native types can also be indexed. For example, + * date/time values represented by a {@link java.util.Date} can be translated + * into a long value using the {@link java.util.Date#getTime} method. If you + * don't need millisecond precision, you can quantize the value, either by + * dividing the result of {@link java.util.Date#getTime} or using the separate + * getters (for year, month, etc.) to construct an int or + * long value. + *

+ * + *

+ * To perform range querying or filtering against a NumericField, + * use {@link NumericRangeQuery} or {@link NumericRangeFilter}. To sort + * according to a NumericField, use the normal numeric sort types, + * eg {@link SortField#INT}. NumericField values can also be loaded + * directly from {@link FieldCache}. + *

+ * + *

+ * By default, a NumericField's value is not stored but is indexed + * for range filtering and sorting. You can use the + * {@link #NumericField(String,Field.Store,boolean)} constructor if you need to + * change these defaults. + *

+ * + *

+ * You may add the same field name as a NumericField to the same + * document more than once. Range querying and filtering will be the logical OR + * of all values; so a range query will hit all documents that have at least one + * value in the range. However sort behavior is not defined. If you need to + * sort, you should separately index a single-valued NumericField. + *

+ * + *

+ * A NumericField will consume somewhat more disk space in the + * index than an ordinary single-valued field. However, for a typical index that + * includes substantial textual content per document, this increase will likely + * be in the noise. + *

+ * + *

+ * Within Lucene, each numeric value is indexed as a trie structure, + * where each term is logically assigned to larger and larger pre-defined + * brackets (which are simply lower-precision representations of the value). The + * step size between each successive bracket is called the + * precisionStep, measured in bits. Smaller + * precisionStep values result in larger number of brackets, which + * consumes more disk space in the index but may result in faster range search + * performance. The default value, 4, was selected for a reasonable tradeoff of + * disk space consumption versus performance. You can use the expert constructor + * {@link #NumericField(String,int,Field.Store,boolean)} if you'd like to change + * the value. Note that you must also specify a congruent value when creating + * {@link NumericRangeQuery} or {@link NumericRangeFilter}. For low cardinality + * fields larger precision steps are good. If the cardinality is < 100, it is + * fair to use {@link Integer#MAX_VALUE}, which produces one term per value. + * + *

+ * For more information on the internals of numeric trie indexing, including the + * + * precisionStep configuration, see {@link NumericRangeQuery}. + * The format of indexed values is described in {@link NumericUtils}. + * + *

+ * If you only need to sort by numeric value, and never run range + * querying/filtering, you can index using a precisionStep of + * {@link Integer#MAX_VALUE}. This will minimize disk space consumed. + *

+ * + *

+ * More advanced users can instead use {@link NumericTokenStream} directly, when + * indexing numbers. This class is a wrapper around this token stream type for + * easier, more intuitive usage. + *

+ * * @since 2.9 */ -public final class NumericField extends AbstractField { - - /** Data type of the value in {@link NumericField}. +public final class NumericField extends Field { + + /** + * Data type of the value in {@link NumericField}. + * * @since 3.2 */ - public static enum DataType { INT, LONG, FLOAT, DOUBLE } - + public static enum DataType { + INT, LONG, FLOAT, DOUBLE + } + + // default field type is only indexed; + public static FieldType DEFAULT_TYPE = new FieldType(EnumSet.of( + FieldType.Property.INDEXED, FieldType.Property.OMIT_NORMS, + FieldType.Property.OMIT_POSITIONS_AND_TERM_FREQS)); + + private DataType dataType; private transient NumericTokenStream numericTS; - private DataType type; private final int precisionStep; - + /** - * Creates a field for numeric values using the default precisionStep - * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with - * a numeric value, before indexing a document containing this field, - * set a value using the various set???Value() methods. - * This constructor creates an indexed, but not stored field. - * @param name the field name + * Creates a field for numeric values using the default + * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * The instance is not yet initialized with a numeric value, before indexing a + * document containing this field, set a value using the various set + * ???Value() methods. This constructor creates an indexed, but not + * stored field. + * + * @param name + * the field name */ public NumericField(String name) { - this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true); + this(name, NumericUtils.PRECISION_STEP_DEFAULT, NumericField.DEFAULT_TYPE); } /** - * Creates a field for numeric values using the default precisionStep - * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with - * a numeric value, before indexing a document containing this field, - * set a value using the various set???Value() methods. - * @param name the field name - * @param store if the field should be stored, {@link Document#getFieldable} - * then returns {@code NumericField} instances on search results. - * @param index if the field should be indexed using {@link NumericTokenStream} + * Creates a field for numeric values using the default + * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * The instance is not yet initialized with a numeric value, before indexing a + * document containing this field, set a value using the various set + * ???Value() methods. + * + * @param name + * the field name + * @param store + * if the field should be stored, {@link Document#getFieldable} then + * returns {@code NumericField} instances on search results. + * @param index + * if the field should be indexed using {@link NumericTokenStream} */ - public NumericField(String name, Field.Store store, boolean index) { - this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index); + public NumericField(String name, FieldType type) { + this(name, NumericUtils.PRECISION_STEP_DEFAULT, type); } /** * Creates a field for numeric values with the specified - * precisionStep. The instance is not yet initialized with - * a numeric value, before indexing a document containing this field, - * set a value using the various set???Value() methods. - * This constructor creates an indexed, but not stored field. - * @param name the field name - * @param precisionStep the used precision step + * precisionStep. The instance is not yet initialized with a + * numeric value, before indexing a document containing this field, set a + * value using the various set???Value() methods. This constructor + * creates an indexed, but not stored field. + * + * @param name + * the field name + * @param precisionStep + * the used precision step */ public NumericField(String name, int precisionStep) { - this(name, precisionStep, Field.Store.NO, true); + this(name, precisionStep, NumericField.DEFAULT_TYPE); } - + /** * Creates a field for numeric values with the specified - * precisionStep. The instance is not yet initialized with - * a numeric value, before indexing a document containing this field, - * set a value using the various set???Value() methods. - * @param name the field name - * @param precisionStep the used precision step - * @param store if the field should be stored, {@link Document#getFieldable} - * then returns {@code NumericField} instances on search results. - * @param index if the field should be indexed using {@link NumericTokenStream} + * precisionStep. The instance is not yet initialized with a + * numeric value, before indexing a document containing this field, set a + * value using the various set???Value() methods. + * + * @param name + * the field name + * @param precisionStep + * the used precision step + * @param store + * if the field should be stored, {@link Document#getFieldable} then + * returns {@code NumericField} instances on search results. + * @param index + * if the field should be indexed using {@link NumericTokenStream} */ - public NumericField(String name, int precisionStep, Field.Store store, boolean index) { - super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO); + public NumericField(String name, int precisionStep, FieldType type) { + super(name, type); this.precisionStep = precisionStep; - setOmitTermFreqAndPositions(true); } - + /** Returns a {@link NumericTokenStream} for indexing the numeric value. */ - public TokenStream tokenStreamValue() { - if (!isIndexed()) - return null; + public TokenStream tokenStreamValue() { + if (!indexed()) return null; if (numericTS == null) { - // lazy init the TokenStream as it is heavy to instantiate (attributes,...), + // lazy init the TokenStream as it is heavy to instantiate + // (attributes,...), // if not needed (stored field loading) numericTS = new NumericTokenStream(precisionStep); // initialize value in TokenStream if (fieldsData != null) { - assert type != null; + assert dataType != null; final Number val = (Number) fieldsData; - switch (type) { + switch (dataType) { case INT: - numericTS.setIntValue(val.intValue()); break; + numericTS.setIntValue(val.intValue()); + break; case LONG: - numericTS.setLongValue(val.longValue()); break; + numericTS.setLongValue(val.longValue()); + break; case FLOAT: - numericTS.setFloatValue(val.floatValue()); break; + numericTS.setFloatValue(val.floatValue()); + break; case DOUBLE: - numericTS.setDoubleValue(val.doubleValue()); break; + numericTS.setDoubleValue(val.doubleValue()); + break; default: assert false : "Should never get here"; } @@ -229,16 +264,22 @@ public Reader readerValue() { return null; } - - /** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)} - * on search results. It is recommended to use {@link Document#getFieldable} instead - * that returns {@code NumericField} instances. You can then use {@link #getNumericValue} - * to return the stored value. */ - public String stringValue() { + + /** + * Returns the numeric value as a string. This format is also returned if you + * call {@link Document#get(String)} on search results. It is recommended to + * use {@link Document#getFieldable} instead that returns {@code NumericField} + * instances. You can then use {@link #getNumericValue} to return the stored + * value. + */ + public String stringValue() { return (fieldsData == null) ? null : fieldsData.toString(); } - /** Returns the current numeric value as a subclass of {@link Number}, null if not yet initialized. */ + /** + * Returns the current numeric value as a subclass of {@link Number}, + * null if not yet initialized. + */ public Number getNumericValue() { return (Number) fieldsData; } @@ -248,67 +289,77 @@ return precisionStep; } - /** Returns the data type of the current value, {@code null} if not yet set. + /** + * Returns the data type of the current value, {@code null} if not yet set. + * * @since 3.2 */ public DataType getDataType() { - return type; + return dataType; } - + public boolean isNumeric() { return true; } /** * Initializes the field with the supplied long value. - * @param value the numeric value + * + * @param value + * the numeric value * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setLongValue(value)) + * document.add(new NumericField(name, precisionStep).setLongValue(value)) */ public NumericField setLongValue(final long value) { if (numericTS != null) numericTS.setLongValue(value); fieldsData = Long.valueOf(value); - type = DataType.LONG; + dataType = DataType.LONG; return this; } /** * Initializes the field with the supplied int value. - * @param value the numeric value + * + * @param value + * the numeric value * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setIntValue(value)) + * document.add(new NumericField(name, precisionStep).setIntValue(value)) */ public NumericField setIntValue(final int value) { if (numericTS != null) numericTS.setIntValue(value); fieldsData = Integer.valueOf(value); - type = DataType.INT; + dataType = DataType.INT; return this; } /** * Initializes the field with the supplied double value. - * @param value the numeric value + * + * @param value + * the numeric value * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setDoubleValue(value)) + * document.add(new NumericField(name, precisionStep).setDoubleValue(value)) */ public NumericField setDoubleValue(final double value) { if (numericTS != null) numericTS.setDoubleValue(value); fieldsData = Double.valueOf(value); - type = DataType.DOUBLE; + dataType = DataType.DOUBLE; return this; } /** * Initializes the field with the supplied float value. - * @param value the numeric value + * + * @param value + * the numeric value * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setFloatValue(value)) + * document.add(new NumericField(name, precisionStep).setFloatValue(value)) */ public NumericField setFloatValue(final float value) { if (numericTS != null) numericTS.setFloatValue(value); fieldsData = Float.valueOf(value); - type = DataType.FLOAT; + dataType = DataType.FLOAT; return this; } - + } Index: StringField.java =================================================================== --- StringField.java (revision 0) +++ StringField.java (revision 0) @@ -0,0 +1,44 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.EnumSet; + +public final class StringField extends Field { + + public static FieldType DEFAULT_TYPE = new FieldType(EnumSet.of(FieldType.Property.INDEXED + , FieldType.Property.OMIT_NORMS + , FieldType.Property.OMIT_POSITIONS_AND_TERM_FREQS + )); + + public StringField(String name, boolean internName, String value) { + super(name, StringField.DEFAULT_TYPE, value); + } + + public StringField(String name, String value) { + this(name, true, value); + } + + public String stringValue() { + return (fieldsData == null) ? null : fieldsData.toString(); + } + + public boolean isNumeric() { + return false; + } +} Index: TextField.java =================================================================== --- TextField.java (revision 0) +++ TextField.java (revision 0) @@ -0,0 +1,35 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.util.EnumSet; + +public final class TextField extends Field { + + public static FieldType DEFAULT_TYPE = new FieldType(EnumSet.of(FieldType.Property.INDEXED + , FieldType.Property.TOKENIZED)); + + public TextField(String name, Reader reader) { + super(name, TextField.DEFAULT_TYPE, reader); + } + + public boolean isNumeric() { + return false; + } +}