Index: lucene/src/java/org/apache/lucene/document2/BinaryField.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/BinaryField.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/BinaryField.java (revision 0) @@ -0,0 +1,33 @@ +package org.apache.lucene.document2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public final class BinaryField extends Field { + + public static final FieldType DEFAULT_TYPE = new FieldType() + .setStored(true); + + public BinaryField(String name, byte[] value) { + super(name, BinaryField.DEFAULT_TYPE, value); + this.isBinary = true; + } + + public boolean isNumeric() { + return false; + } +} Index: lucene/src/java/org/apache/lucene/document2/Document.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/Document.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/Document.java (revision 0) @@ -0,0 +1,346 @@ +package org.apache.lucene.document2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.*; + +import org.apache.lucene.index.IndexReader; // for javadoc +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.IndexSearcher; // for javadoc +import org.apache.lucene.search.ScoreDoc; // for javadoc + +/** Documents are the unit of indexing and search. + * + * A Document is a set of fields. Each field has a name and a textual value. + * A field may be {@link Fieldable#isStored() stored} with the document, in which + * case it is returned with search hits on the document. Thus each document + * should typically contain one or more stored fields which uniquely identify + * it. + * + *

Note that fields which are not {@link Fieldable#isStored() stored} are + * not available in documents retrieved from the index, e.g. with {@link + * ScoreDoc#doc} or {@link IndexReader#document(int)}. + */ + +public final class Document implements Iterable { + + List fields = new ArrayList(); + private float boost = 1.0f; + + /** Constructs a new document with no fields. */ + public Document() {} + + // @Override not until Java 1.6 + public Iterator iterator() { + // nocommit this shim code is temporary!! only here as an + // example... we will fix it "properly" for LUCENE-2308 + + // nocommit -- must multiply in docBoost to each + // provided field + + return new Iterator() { + private int fieldUpto = 0; + + public boolean hasNext() { + return fieldUpto < fields.size(); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + public IndexableField next() { + return fields.get(fieldUpto++); + } + }; + } + + /** Sets a boost factor for hits on any field of this document. This value + * will be multiplied into the score of all hits on this document. + * + *

The default value is 1.0. + * + *

Values are multiplied into the value of {@link IndexableField#getBoost()} of + * each field in this document. Thus, this method in effect sets a default + * boost for the fields of this document. + * + * @see IndexableField#setBoost(float) + */ + public void setBoost(float boost) { + this.boost = boost; + } + + /** Returns, at indexing time, the boost factor as set by {@link #setBoost(float)}. + * + *

Note that once a document is indexed this value is no longer available + * from the index. At search time, for retrieved documents, this method always + * returns 1. This however does not mean that the boost value set at indexing + * time was ignored - it was just combined with other indexing time factors and + * stored elsewhere, for better indexing and search performance. (For more + * information see the "norm(t,d)" part of the scoring formula in + * {@link org.apache.lucene.search.Similarity Similarity}.) + * + * @see #setBoost(float) + */ + // @Override not until Java 1.6 + public float getBoost() { + return boost; + } + + /** + *

Adds a field to a document. Several fields may be added with + * the same name. In this case, if the fields are indexed, their text is + * treated as though appended for the purposes of search.

+ *

Note that add like the removeField(s) methods only makes sense + * prior to adding a document to an index. These methods cannot + * be used to change the content of an existing index! In order to achieve this, + * a document has to be deleted from an index and a new changed version of that + * document has to be added.

+ */ + public final void add(IndexableField field) { + fields.add(field); + } + + /** + *

Removes field with the specified name from the document. + * If multiple fields exist with this name, this method removes the first field that has been added. + * If there is no field with the specified name, the document remains unchanged.

+ *

Note that the removeField(s) methods like the add method only make sense + * prior to adding a document to an index. These methods cannot + * be used to change the content of an existing index! In order to achieve this, + * a document has to be deleted from an index and a new changed version of that + * document has to be added.

+ */ + public final void removeField(String name) { + Iterator it = fields.iterator(); + while (it.hasNext()) { + IndexableField field = it.next(); + if (field.name().equals(name)) { + it.remove(); + return; + } + } + } + + /** + *

Removes all fields with the given name from the document. + * If there is no field with the specified name, the document remains unchanged.

+ *

Note that the removeField(s) methods like the add method only make sense + * prior to adding a document to an index. These methods cannot + * be used to change the content of an existing index! In order to achieve this, + * a document has to be deleted from an index and a new changed version of that + * document has to be added.

+ */ + public final void removeFields(String name) { + Iterator it = fields.iterator(); + while (it.hasNext()) { + IndexableField field = it.next(); + if (field.name().equals(name)) { + it.remove(); + } + } + } + + /** Returns a field with the given name if any exist in this document, or + * null. If multiple fields exists with this name, this method returns the + * first value added. + * Do not use this method with lazy loaded fields or {@link NumericField}. + * @deprecated use {@link #getIndexableField} instead and cast depending on + * data type. + * @throws ClassCastException if you try to retrieve a numerical or + * lazy loaded field. + @Deprecated + public final Field getField(String name) { + return (Field) getIndexableField(name); + } + */ + + + /** Returns a field with the given name if any exist in this document, or + * null. If multiple fields exists with this name, this method returns the + * first value added. + public IndexableField getIndexableField(String name) { + for (IndexableField field : fields) { + if (field.name().equals(name)) + return field; + } + return null; + } + */ + + /** Returns the string value of the field with the given name if any exist in + * this document, or null. If multiple fields exist with this name, this + * method returns the first value added. If only binary fields with this name + * exist, returns null. + * For {@link NumericField} it returns the string value of the number. If you want + * the actual {@code NumericField} instance back, use {@link #getIndexableField}. + */ + public final String get(String name) { + for (IndexableField field : fields) { + if (field.name().equals(name) && !((Field) field).isBinary()) + return field.stringValue(); + } + return null; + } + + /** Returns a List of all the fields in a document. + *

Note that fields which are not {@link IndexableField#isStored() stored} are + * not available in documents retrieved from the + * index, e.g. {@link IndexSearcher#doc(int)} or {@link + * IndexReader#document(int)}. + public final List getFields() { + return fields; + } + */ + + private final static Field[] NO_FIELDS = new Field[0]; + + /** + * Returns an array of {@link Field}s with the given name. + * This method returns an empty array when there are no + * matching fields. It never returns null. + * Do not use this method with lazy loaded fields or {@link NumericField}. + * + * @param name the name of the field + * @return a Field[] array + * @deprecated use {@link #getIndexableField} instead and cast depending on + * data type. + * @throws ClassCastException if you try to retrieve a numerical or + * lazy loaded field. + @Deprecated + public final Field[] getFields(String name) { + List result = new ArrayList(); + for (IndexableField field : fields) { + if (field.name().equals(name)) { + result.add((Field) field); + } + } + + if (result.size() == 0) + return NO_FIELDS; + + return result.toArray(new Field[result.size()]); + } + */ + + + private final static IndexableField[] NO_INDEXABLEFIELDS = new IndexableField[0]; + + /** + * Returns an array of {@link IndexableField}s with the given name. + * This method returns an empty array when there are no + * matching fields. It never returns null. + * + * @param name the name of the field + * @return a IndexableField[] array + public IndexableField[] getIndexableFields(String name) { + List result = new ArrayList(); + for (IndexableField field : fields) { + if (field.name().equals(name)) { + result.add(field); + } + } + + if (result.size() == 0) + return NO_INDEXABLEFIELDS; + + return result.toArray(new IndexableField[result.size()]); + } + */ + + + private final static String[] NO_STRINGS = new String[0]; + + /** + * Returns an array of values of the field specified as the method parameter. + * This method returns an empty array when there are no + * matching fields. It never returns null. + * For {@link NumericField}s it returns the string value of the number. If you want + * the actual {@code NumericField} instances back, use {@link #getIndexableFields}. + * @param name the name of the field + * @return a String[] of field values + */ + public final String[] getValues(String name) { + List result = new ArrayList(); + for (IndexableField field : fields) { + if (field.name().equals(name) && !((Field) field).isBinary()) + result.add(field.stringValue()); + } + + if (result.size() == 0) + return NO_STRINGS; + + return result.toArray(new String[result.size()]); + } + + private final static byte[][] NO_BYTES = new byte[0][]; + + /** + * Returns an array of byte arrays for of the fields that have the name specified + * as the method parameter. This method returns an empty + * array when there are no matching fields. It never + * returns null. + * + * @param name the name of the field + * @return a byte[][] of binary field values + */ + public final byte[][] getBinaryValues(String name) { + List result = new ArrayList(); + for (IndexableField field : fields) { + if (field.name().equals(name) && ((Field) field).isBinary()) + result.add(field.binaryValue(null).bytes); + } + + if (result.size() == 0) + return NO_BYTES; + + return result.toArray(new byte[result.size()][]); + } + + /** + * Returns an array of bytes for the first (or only) field that has the name + * specified as the method parameter. This method will return null + * if no binary fields with the specified name are available. + * There may be non-binary fields with the same name. + * + * @param name the name of the field. + * @return a byte[] containing the binary field value or null + */ + public final byte[] getBinaryValue(String name) { + for (IndexableField field : fields) { + if (field.name().equals(name) && ((Field) field).isBinary()) + return field.binaryValue(null).bytes; + } + return null; + } + + /** Prints the fields of a document for human consumption. */ + @Override + public final String toString() { + StringBuilder buffer = new StringBuilder(); + buffer.append("Document<"); + for (int i = 0; i < fields.size(); i++) { + IndexableField field = fields.get(i); + buffer.append(field.toString()); + if (i != fields.size()-1) + buffer.append(" "); + } + buffer.append(">"); + return buffer.toString(); + } +} Index: lucene/src/java/org/apache/lucene/document2/Field.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/Field.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/Field.java (revision 0) @@ -0,0 +1,370 @@ +package org.apache.lucene.document2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.NumericField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; + +/** + * A field is a section of a Document. Each field has two parts, a name and a + * value. Values may be free text, provided as a String or as a Reader, or they + * may be atomic keywords, which are not further processed. Such keywords may be + * used to represent dates, urls, etc. Fields are optionally stored in the + * index, so that they may be returned with hits on the document. + */ + +public class Field implements IndexableField { + + protected FieldType type; + protected String name = "body"; + // the data object for all different kind of field values + protected Object fieldsData = null; + // pre-analyzed tokenStream for indexed fields + protected TokenStream tokenStream; + protected boolean isBinary = false; + // length/offset for all primitive types + protected int binaryLength; + protected int binaryOffset; + + protected float boost = 1.0f; + + public Field(String name, FieldType type) { + + } + + public Field(String name, FieldType type, Reader reader) { + if (name == null) + throw new NullPointerException("name cannot be null"); + if (reader == null) + throw new NullPointerException("reader cannot be null"); + + this.name = StringHelper.intern(name); // field names are interned + this.fieldsData = reader; + } + + public Field(String name, FieldType type, TokenStream tokenStream) { + if (name == null) + throw new NullPointerException("name cannot be null"); + if (tokenStream == null) + throw new NullPointerException("tokenStream cannot be null"); + + this.name = StringHelper.intern(name); // field names are interned + this.fieldsData = null; + this.tokenStream = tokenStream; + } + + public Field(String name, FieldType type, byte[] value) { + this(name, type, value, 0, value.length); + } + + public Field(String name, FieldType type, byte[] value, int offset, int length) { + this.isBinary = true; + this.fieldsData = value; + this.type = type; + this.binaryOffset = offset; + this.binaryLength = length; + this.name = StringHelper.intern(name); + } + + public Field(String name, FieldType type, String value) { + this(name, true, type, value); + } + + public Field(String name, boolean internName, FieldType type, String value) { + if (name == null) + throw new IllegalArgumentException("name cannot be null"); + if (value == null) + throw new IllegalArgumentException("value cannot be null"); + + this.type = type; + this.name = name; + this.fieldsData = value; + + if (internName) // field names are optionally interned + name = StringHelper.intern(name); + } + + /** + * The value of the field as a String, or null. If null, the Reader value or + * binary value is used. Exactly one of stringValue(), readerValue(), and + * getBinaryValue() must be set. + */ + public String stringValue() { + return fieldsData instanceof String ? (String) fieldsData : null; + } + + /** + * The value of the field as a Reader, or null. If null, the String value or + * binary value is used. Exactly one of stringValue(), readerValue(), and + * getBinaryValue() must be set. + */ + public Reader readerValue() { + return fieldsData instanceof Reader ? (Reader) fieldsData : null; + } + + /** + * The TokesStream for this field to be used when indexing, or null. If null, + * the Reader value or String value is analyzed to produce the indexed tokens. + */ + public TokenStream tokenStreamValue() { + return tokenStream; + } + + public Number getNumericValue() { + return null; + } + + /** + *

+ * Expert: change the value of this field. This can be used during indexing to + * re-use a single Field instance to improve indexing speed by avoiding GC + * cost of new'ing and reclaiming Field instances. Typically a single + * {@link Document} instance is re-used as well. This helps most on small + * documents. + *

+ * + *

+ * Each Field instance should only be used once within a single + * {@link Document} instance. See ImproveIndexingSpeed for details. + *

+ */ + public void setValue(String value) { + if (isBinary) { + throw new IllegalArgumentException( + "cannot set a String value on a binary field"); + } + fieldsData = value; + } + + /** + * Expert: change the value of this field. See setValue(String). + */ + public void setValue(Reader value) { + if (isBinary) { + throw new IllegalArgumentException( + "cannot set a Reader value on a binary field"); + } + if (stored()) { + throw new IllegalArgumentException( + "cannot set a Reader value on a stored field"); + } + fieldsData = value; + } + + /** + * Expert: change the value of this field. See setValue(String). + */ + public void setValue(byte[] value) { + if (!isBinary) { + throw new IllegalArgumentException( + "cannot set a byte[] value on a non-binary field"); + } + fieldsData = value; + binaryLength = value.length; + binaryOffset = 0; + } + + /** + * Expert: change the value of this field. See setValue(String). + */ + public void setValue(byte[] value, int offset, int length) { + if (!isBinary) { + throw new IllegalArgumentException( + "cannot set a byte[] value on a non-binary field"); + } + fieldsData = value; + binaryLength = length; + binaryOffset = offset; + } + + /** + * Expert: sets the token stream to be used for indexing and causes + * isIndexed() and isTokenized() to return true. May be combined with stored + * values from stringValue() or getBinaryValue() + */ + public void setTokenStream(TokenStream tokenStream) { + if (!indexed() || !tokenized()) { + throw new IllegalArgumentException( + "cannot set token stream on non indexed and tokenized field"); + } + this.tokenStream = tokenStream; + } + + public String name() { + return name; + } + + public float boost() { + return boost; + } + + /** + * Sets the boost factor hits on this field. This value will be multiplied + * into the score of all hits on this this field of this document. + * + *

+ * The boost is multiplied by + * {@link org.apache.lucene.document.Document#getBoost()} of the document + * containing this field. If a document has multiple fields with the same + * name, all such values are multiplied together. This product is then used to + * compute the norm factor for the field. By default, in the + * {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} + * method, the boost value is multiplied by the length normalization factor + * and then rounded by + * {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before + * it is stored in the index. One should attempt to ensure that this product + * does not overflow the range of that encoding. + * + * @see org.apache.lucene.document.Document#setBoost(float) + * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState) + * @see org.apache.lucene.search.Similarity#encodeNormValue(float) + */ + public void setBoost(float boost) { + this.boost = boost; + } + + private byte[] getBinaryValue(byte[] result /* unused */) { + if (isBinary || fieldsData instanceof byte[]) return (byte[]) fieldsData; + else return null; + } + + public boolean numeric() { + return false; + } + + public Number numericValue() { + return null; + } + + public NumericField.DataType numericDataType() { + return null; + } + + private byte[] getBinaryValue() { + return getBinaryValue(null); + } + + public BytesRef binaryValue(BytesRef reuse) { + final byte[] bytes = getBinaryValue(); + if (bytes != null) { + if (reuse == null) { + return new BytesRef(bytes, getBinaryOffset(), getBinaryLength()); + } else { + reuse.bytes = bytes; + reuse.offset = getBinaryOffset(); + reuse.length = getBinaryLength(); + return reuse; + } + } else { + return null; + } + } + + /** + * Returns length of byte[] segment that is used as value, if Field is not + * binary returned value is undefined + * + * @return length of byte[] segment that represents this Field value + */ + private int getBinaryLength() { + if (isBinary) { + return binaryLength; + } else if (fieldsData instanceof byte[]) return ((byte[]) fieldsData).length; + else return 0; + } + + /** + * Returns offset into byte[] segment that is used as value, if Field is not + * binary returned value is undefined + * + * @return index of the first character in byte[] segment that represents this + * Field value + */ + public int getBinaryOffset() { + return binaryOffset; + } + + public boolean isBinary() { + return isBinary; + } + + /** methods from inner FieldType */ + + public boolean stored() { + return type.stored(); + } + + public boolean indexed() { + return type.indexed(); + } + + public boolean tokenized() { + return type.tokenized(); + } + + public boolean omitNorms() { + return type.omitNorms(); + } + + public boolean omitTermFreqAndPositions() { + return type.omitTermFreqAndPositions(); + } + + public boolean storeTermVectors() { + return type.storeTermVectors(); + } + + public boolean storeTermVectorOffsets() { + return type.storeTermVectorOffsets(); + } + + public boolean storeTermVectorPositions() { + return type.storeTermVectorPositions(); + } + + public boolean lazy() { + return type.lazy(); + } + + /** Prints a Field for human consumption. */ + @Override + public final String toString() { + StringBuilder result = new StringBuilder(); + result.append(type.toString()); + result.append('<'); + result.append(name); + result.append(':'); + + if (fieldsData != null && type.lazy() == false) { + result.append(fieldsData); + } + + result.append('>'); + return result.toString(); + } +} Index: lucene/src/java/org/apache/lucene/document2/FieldType.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/FieldType.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/FieldType.java (revision 0) @@ -0,0 +1,157 @@ +package org.apache.lucene.document2; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class FieldType { + + private boolean indexed; + private boolean stored; + private boolean tokenized; + private boolean storeTermVectors; + private boolean storeTermVectorsOffsets; + private boolean storeTermVectorsPositions; + private boolean omitNorms; + private boolean omitTermFreqsAndPositions; + private boolean lazy; + + public boolean indexed() { + return this.indexed; + } + + public FieldType setIndexed(boolean value) { + this.indexed = value; + return this; + } + + public boolean stored() { + return this.stored; + } + + public FieldType setStored(boolean value) { + this.stored = value; + return this; + } + + public boolean tokenized() { + return this.tokenized; + } + + public FieldType setTokenized(boolean value) { + this.tokenized = value; + return this; + } + + public boolean storeTermVectors() { + return this.storeTermVectors; + } + + public FieldType setStoreTermVectors(boolean value) { + this.storeTermVectors = value; + return this; + } + + public boolean storeTermVectorOffsets() { + return this.storeTermVectorsOffsets; + } + + public FieldType setStoreTermVectorOffsets(boolean value) { + this.storeTermVectorsOffsets = value; + return this; + } + + public boolean storeTermVectorPositions() { + return this.storeTermVectorsPositions; + } + + public FieldType setStoreTermVectorPositions(boolean value) { + this.storeTermVectorsPositions = value; + return this; + } + + public boolean omitNorms() { + return this.omitNorms; + } + + public FieldType setOmitNorms(boolean value) { + this.omitNorms = value; + return this; + } + + public boolean omitTermFreqAndPositions() { + return this.omitTermFreqsAndPositions; + } + + public FieldType setOmitTermFreqAndPositions(boolean value) { + this.omitTermFreqsAndPositions = value; + return this; + } + + public boolean lazy() { + return this.lazy; + } + + public FieldType setLazy(boolean value) { + this.lazy = value; + return this; + } + + /** Prints a Field for human consumption. */ + @Override + public final String toString() { + StringBuilder result = new StringBuilder(); + if (stored()) { + result.append("stored"); + } + if (indexed()) { + if (result.length() > 0) + result.append(","); + result.append("indexed"); + } + if (tokenized()) { + if (result.length() > 0) + result.append(","); + result.append("tokenized"); + } + if (storeTermVectors()) { + if (result.length() > 0) + result.append(","); + result.append("termVector"); + } + if (storeTermVectorOffsets()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorOffsets"); + } + if (storeTermVectorPositions()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorPosition"); + } + if (omitNorms()) { + result.append(",omitNorms"); + } + if (omitTermFreqAndPositions()) { + result.append(",omitTermFreqAndPositions"); + } + if (lazy()){ + result.append(",lazy"); + } + + return result.toString(); + } +} Index: lucene/src/java/org/apache/lucene/document2/NumericField.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/NumericField.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/NumericField.java (revision 0) @@ -0,0 +1,369 @@ +package org.apache.lucene.document2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; + +import org.apache.lucene.document.NumericField.DataType; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.NumericTokenStream; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.search.NumericRangeQuery; // javadocs +import org.apache.lucene.search.NumericRangeFilter; // javadocs +import org.apache.lucene.search.SortField; // javadocs +import org.apache.lucene.search.FieldCache; // javadocs + +/** + *

+ * This class provides a {@link Field} that enables indexing of numeric values + * for efficient range filtering and sorting. Here's an example usage, adding an + * int value: + * + *

+ * document.add(new NumericField(name).setIntValue(value));
+ * 
+ * + * For optimal performance, re-use the NumericField and + * {@link Document} instance for more than one document: + * + *
+ *  NumericField field = new NumericField(name);
+ *  Document document = new Document();
+ *  document.add(field);
+ * 
+ *  for(all documents) {
+ *    ...
+ *    field.setIntValue(value)
+ *    writer.addDocument(document);
+ *    ...
+ *  }
+ * 
+ * + *

+ * The java native types int, long, float + * and double are directly supported. However, any value that can + * be converted into these native types can also be indexed. For example, + * date/time values represented by a {@link java.util.Date} can be translated + * into a long value using the {@link java.util.Date#getTime} method. If you + * don't need millisecond precision, you can quantize the value, either by + * dividing the result of {@link java.util.Date#getTime} or using the separate + * getters (for year, month, etc.) to construct an int or + * long value. + *

+ * + *

+ * To perform range querying or filtering against a NumericField, + * use {@link NumericRangeQuery} or {@link NumericRangeFilter}. To sort + * according to a NumericField, use the normal numeric sort types, + * eg {@link SortField#INT}. NumericField values can also be loaded + * directly from {@link FieldCache}. + *

+ * + *

+ * By default, a NumericField's value is not stored but is indexed + * for range filtering and sorting. You can use the + * {@link #NumericField(String,Field.Store,boolean)} constructor if you need to + * change these defaults. + *

+ * + *

+ * You may add the same field name as a NumericField to the same + * document more than once. Range querying and filtering will be the logical OR + * of all values; so a range query will hit all documents that have at least one + * value in the range. However sort behavior is not defined. If you need to + * sort, you should separately index a single-valued NumericField. + *

+ * + *

+ * A NumericField will consume somewhat more disk space in the + * index than an ordinary single-valued field. However, for a typical index that + * includes substantial textual content per document, this increase will likely + * be in the noise. + *

+ * + *

+ * Within Lucene, each numeric value is indexed as a trie structure, + * where each term is logically assigned to larger and larger pre-defined + * brackets (which are simply lower-precision representations of the value). The + * step size between each successive bracket is called the + * precisionStep, measured in bits. Smaller + * precisionStep values result in larger number of brackets, which + * consumes more disk space in the index but may result in faster range search + * performance. The default value, 4, was selected for a reasonable tradeoff of + * disk space consumption versus performance. You can use the expert constructor + * {@link #NumericField(String,int,Field.Store,boolean)} if you'd like to change + * the value. Note that you must also specify a congruent value when creating + * {@link NumericRangeQuery} or {@link NumericRangeFilter}. For low cardinality + * fields larger precision steps are good. If the cardinality is < 100, it is + * fair to use {@link Integer#MAX_VALUE}, which produces one term per value. + * + *

+ * For more information on the internals of numeric trie indexing, including the + * + * precisionStep configuration, see {@link NumericRangeQuery}. + * The format of indexed values is described in {@link NumericUtils}. + * + *

+ * If you only need to sort by numeric value, and never run range + * querying/filtering, you can index using a precisionStep of + * {@link Integer#MAX_VALUE}. This will minimize disk space consumed. + *

+ * + *

+ * More advanced users can instead use {@link NumericTokenStream} directly, when + * indexing numbers. This class is a wrapper around this token stream type for + * easier, more intuitive usage. + *

+ * + * @since 2.9 + */ +public final class NumericField extends Field { + + /** + * Data type of the value in {@link NumericField}. + * + * @since 3.2 + */ + + /* + public static enum DataType { + INT, LONG, FLOAT, DOUBLE + } + */ + + public static final FieldType DEFAULT_TYPE = new FieldType() + .setIndexed(true) + .setOmitNorms(true) + .setOmitTermFreqAndPositions(true); + + private org.apache.lucene.document.NumericField.DataType dataType; + private transient NumericTokenStream numericTS; + private final int precisionStep; + + /** + * Creates a field for numeric values using the default + * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * The instance is not yet initialized with a numeric value, before indexing a + * document containing this field, set a value using the various set + * ???Value() methods. This constructor creates an indexed, but not + * stored field. + * + * @param name + * the field name + */ + public NumericField(String name) { + this(name, NumericUtils.PRECISION_STEP_DEFAULT, NumericField.DEFAULT_TYPE); + } + + /** + * Creates a field for numeric values using the default + * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * The instance is not yet initialized with a numeric value, before indexing a + * document containing this field, set a value using the various set + * ???Value() methods. + * + * @param name + * the field name + * @param store + * if the field should be stored, {@link Document#getFieldable} then + * returns {@code NumericField} instances on search results. + * @param index + * if the field should be indexed using {@link NumericTokenStream} + */ + public NumericField(String name, FieldType type) { + this(name, NumericUtils.PRECISION_STEP_DEFAULT, type); + } + + /** + * Creates a field for numeric values with the specified + * precisionStep. The instance is not yet initialized with a + * numeric value, before indexing a document containing this field, set a + * value using the various set???Value() methods. This constructor + * creates an indexed, but not stored field. + * + * @param name + * the field name + * @param precisionStep + * the used precision step + */ + public NumericField(String name, int precisionStep) { + this(name, precisionStep, NumericField.DEFAULT_TYPE); + } + + /** + * Creates a field for numeric values with the specified + * precisionStep. The instance is not yet initialized with a + * numeric value, before indexing a document containing this field, set a + * value using the various set???Value() methods. + * + * @param name + * the field name + * @param precisionStep + * the used precision step + * @param store + * if the field should be stored, {@link Document#getFieldable} then + * returns {@code NumericField} instances on search results. + * @param index + * if the field should be indexed using {@link NumericTokenStream} + */ + public NumericField(String name, int precisionStep, FieldType type) { + super(name, type); + this.precisionStep = precisionStep; + } + + /** Returns a {@link NumericTokenStream} for indexing the numeric value. */ + public TokenStream tokenStreamValue() { + if (!indexed()) return null; + if (numericTS == null) { + // lazy init the TokenStream as it is heavy to instantiate + // (attributes,...), + // if not needed (stored field loading) + numericTS = new NumericTokenStream(precisionStep); + // initialize value in TokenStream + if (fieldsData != null) { + assert dataType != null; + final Number val = (Number) fieldsData; + switch (dataType) { + case INT: + numericTS.setIntValue(val.intValue()); + break; + case LONG: + numericTS.setLongValue(val.longValue()); + break; + case FLOAT: + numericTS.setFloatValue(val.floatValue()); + break; + case DOUBLE: + numericTS.setDoubleValue(val.doubleValue()); + break; + default: + assert false : "Should never get here"; + } + } + } + return numericTS; + } + + /** Returns always null for numeric fields */ + public Reader readerValue() { + return null; + } + + /** + * Returns the numeric value as a string. This format is also returned if you + * call {@link Document#get(String)} on search results. It is recommended to + * use {@link Document#getFieldable} instead that returns {@code NumericField} + * instances. You can then use {@link #getNumericValue} to return the stored + * value. + */ + public String stringValue() { + return (fieldsData == null) ? null : fieldsData.toString(); + } + + /** + * Returns the current numeric value as a subclass of {@link Number}, + * null if not yet initialized. + */ + public Number getNumericValue() { + return (Number) fieldsData; + } + + /** Returns the precision step. */ + public int getPrecisionStep() { + return precisionStep; + } + + /** + * Returns the data type of the current value, {@code null} if not yet set. + * + * @since 3.2 + */ + public DataType getNumericDataType() { + return dataType; + } + + public boolean isNumeric() { + return true; + } + + /** + * Initializes the field with the supplied long value. + * + * @param value + * the numeric value + * @return this instance, because of this you can use it the following way: + * document.add(new NumericField(name, precisionStep).setLongValue(value)) + */ + public NumericField setLongValue(final long value) { + if (numericTS != null) numericTS.setLongValue(value); + fieldsData = Long.valueOf(value); + dataType = DataType.LONG; + return this; + } + + /** + * Initializes the field with the supplied int value. + * + * @param value + * the numeric value + * @return this instance, because of this you can use it the following way: + * document.add(new NumericField(name, precisionStep).setIntValue(value)) + */ + public NumericField setIntValue(final int value) { + if (numericTS != null) numericTS.setIntValue(value); + fieldsData = Integer.valueOf(value); + dataType = DataType.INT; + return this; + } + + /** + * Initializes the field with the supplied double value. + * + * @param value + * the numeric value + * @return this instance, because of this you can use it the following way: + * document.add(new NumericField(name, precisionStep).setDoubleValue(value)) + */ + public NumericField setDoubleValue(final double value) { + if (numericTS != null) numericTS.setDoubleValue(value); + fieldsData = Double.valueOf(value); + dataType = DataType.DOUBLE; + return this; + } + + /** + * Initializes the field with the supplied float value. + * + * @param value + * the numeric value + * @return this instance, because of this you can use it the following way: + * document.add(new NumericField(name, precisionStep).setFloatValue(value)) + */ + public NumericField setFloatValue(final float value) { + if (numericTS != null) numericTS.setFloatValue(value); + fieldsData = Float.valueOf(value); + dataType = DataType.FLOAT; + return this; + } + +} Index: lucene/src/java/org/apache/lucene/document2/StringField.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/StringField.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/StringField.java (revision 0) @@ -0,0 +1,42 @@ +package org.apache.lucene.document2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public final class StringField extends Field { + + public static final FieldType DEFAULT_TYPE = new FieldType() + .setIndexed(true) + .setOmitNorms(true) + .setOmitTermFreqAndPositions(true); + + public StringField(String name, boolean internName, String value) { + super(name, StringField.DEFAULT_TYPE, value); + } + + public StringField(String name, String value) { + this(name, true, value); + } + + public String stringValue() { + return (fieldsData == null) ? null : fieldsData.toString(); + } + + public boolean isNumeric() { + return false; + } +} Index: lucene/src/java/org/apache/lucene/document2/TextField.java =================================================================== --- lucene/src/java/org/apache/lucene/document2/TextField.java (revision 0) +++ lucene/src/java/org/apache/lucene/document2/TextField.java (revision 0) @@ -0,0 +1,39 @@ +package org.apache.lucene.document2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; + +public final class TextField extends Field { + + public static final FieldType DEFAULT_TYPE = new FieldType() + .setIndexed(true) + .setTokenized(true); + + public TextField(String name, Reader reader) { + super(name, TextField.DEFAULT_TYPE, reader); + } + + public TextField(String name, String value) { + super(name, TextField.DEFAULT_TYPE, value); + } + + public boolean isNumeric() { + return false; + } +} Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1134546) +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -40,6 +40,7 @@ import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document2.FieldType; import org.apache.lucene.index.*; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; @@ -1072,7 +1073,24 @@ public static Field newField(String name, String value, Store store, Index index) { return newField(random, name, value, store, index); } + + public static org.apache.lucene.document2.Field newField(String name, String value, FieldType type) { + return newField(random, name, value, type); + } + public static org.apache.lucene.document2.Field newField(Random random, String name, String value, FieldType type) { + if (usually(random)) { + // most of the time, don't modify the params + return new org.apache.lucene.document2.Field(name, type, value); + } + + if (!type.stored() && random.nextBoolean()) { + type.setStored(true); // randomly store it + } + + return new org.apache.lucene.document2.Field(name, type, value); + } + /** * Returns a new Field instance. Use this when the test does not * care about some specific field settings (most tests) Index: lucene/src/test/org/apache/lucene/TestDemo.java =================================================================== --- lucene/src/test/org/apache/lucene/TestDemo.java (revision 1134546) +++ lucene/src/test/org/apache/lucene/TestDemo.java (working copy) @@ -21,8 +21,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document2.Document; +import org.apache.lucene.document2.FieldType; +import org.apache.lucene.document2.TextField; import org.apache.lucene.index.Term; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.queryParser.ParseException; @@ -54,8 +55,9 @@ Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; - doc.add(newField("fieldname", text, Field.Store.YES, - Field.Index.ANALYZED)); + FieldType textType = TextField.DEFAULT_TYPE; + textType.setStored(true); + doc.add(newField("fieldname", text, textType)); iwriter.addDocument(doc); iwriter.close(); @@ -70,7 +72,7 @@ assertEquals(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { - Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); + org.apache.lucene.document.Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); assertEquals(text, hitDoc.get("fieldname")); }