Index: modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java =================================================================== --- modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 1162944) +++ modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (working copy) @@ -179,8 +179,8 @@ protected Config config; - protected final FieldType valType; - protected final FieldType bodyValType; + protected FieldType valType; + protected FieldType bodyValType; protected ContentSource source; protected boolean reuseFields; @@ -194,10 +194,6 @@ private int printNum = 0; public DocMaker() { - valType = new FieldType(TextField.TYPE_UNSTORED); - valType.setOmitNorms(true); - - bodyValType = new FieldType(TextField.TYPE_UNSTORED); } // create a doc @@ -465,20 +461,24 @@ boolean termVecPositions = config.get("doc.term.vector.positions", false); boolean termVecOffsets = config.get("doc.term.vector.offsets", false); + valType = new FieldType(TextField.TYPE_UNSTORED); valType.setStored(stored); - bodyValType.setStored(bodyStored); valType.setTokenized(tokenized); valType.setOmitNorms(!norms); - bodyValType.setTokenized(bodyTokenized); - bodyValType.setOmitNorms(!bodyNorms); - valType.setStoreTermVectors(termVec); valType.setStoreTermVectorPositions(termVecPositions); valType.setStoreTermVectorOffsets(termVecOffsets); + valType.freeze(); + + bodyValType = new FieldType(TextField.TYPE_UNSTORED); + bodyValType.setStored(bodyStored); + bodyValType.setTokenized(bodyTokenized); + bodyValType.setOmitNorms(!bodyNorms); bodyValType.setStoreTermVectors(termVec); bodyValType.setStoreTermVectorPositions(termVecPositions); bodyValType.setStoreTermVectorOffsets(termVecOffsets); - + bodyValType.freeze(); + storeBytes = config.get("doc.store.body.bytes", false); reuseFields = config.get("doc.reuse.fields", true); Index: lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java =================================================================== --- lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java (revision 1162944) +++ lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java (working copy) @@ -174,7 +174,6 @@ ft.setStored(true); ft.setOmitNorms(fi.omitNorms); ft.setIndexOptions(fi.indexOptions); - ft.setLazy(true); if (binary) { f = new LazyField(in, fi.name, ft, numBytes, pointer, binary, cacheResult); Index: lucene/MIGRATE.txt =================================================================== --- lucene/MIGRATE.txt (revision 1162944) +++ lucene/MIGRATE.txt (working copy) @@ -407,3 +407,115 @@ - o.a.l.queryParser.QueryParserTokenManager -> o.a.l.queryparser.classic.QueryParserTokenManager - o.a.l.queryParser.QueryParserToken -> o.a.l.queryparser.classic.Token - o.a.l.queryParser.QueryParserTokenMgrError -> o.a.l.queryparser.classic.TokenMgrError + + + +* LUCENE-2308: Separate FieldType from Field instances + +With this change, the indexing details (indexed, tokenized, norms, +indexOptions, stored, etc.) are moved into a separate FieldType +instance (rather than being stored directly on the Field). + +This means you can create the FieldType instance once, up front, for a +given field, and then re-use that instance whenever you instantiate +the Field. + +Certain field types are pre-defined since they are common cases: + + * StringField: indexes a String value as a single token (ie, does + not tokenize). This field turns off norms and indexes only doc + IDS (does not index term frequency nor positions). This field + does not store its value, but exposes TYPE_STORED as well. + + * BinaryField: a byte[] value that's only stored. + + * TextField: indexes and tokenizes a String, Reader or TokenStream + value, without term vectors. This field does not store its value, + but exposes TYPE_STORED as well. + +If your usage fits one of those common cases you can simply +instantiate the above class. To use the TYPE_STORED variant, do this +instead: + + Field f = new Field("field", StringField.TYPE_STORED, "value"); + +Alternatively, if an existing type is close to what you want but you +need to make a few changes, you can copy that type and make changes: + + FieldType bodyType = new FieldType(TextField.TYPE_STORED); + bodyType.setStoreTermVectors(true); + + +You can of course also create your own FieldType from scratch: + + FieldType t = new FieldType(); + t.setIndexed(true); + t.setStored(true); + t.setOmitNorms(true); + t.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + +FieldType has a freeze() method to prevent further changes. Note that +once a FieldType is bound to a Field, it's frozen, to help prevent +confusing bugs. + +When migrating from the 3.x API, if you did this before: + + new Field("field", "value", Field.Store.NO, Field.Indexed.NOT_ANALYZED_NO_NORMS) + +you can now do this: + + new StringField("field", "value") + +(though note that StringField indexes DOCS_ONLY). + +If instead the value was stored: + + new Field("field", "value", Field.Store.YES, Field.Indexed.NOT_ANALYZED_NO_NORMS) + +you can now do this: + + new Field("field", StringField.TYPE_STORED, "value") + +If you didn't omit norms: + + new Field("field", "value", Field.Store.YES, Field.Indexed.NOT_ANALYZED) + +you can now do this: + + FieldType ft = new FieldType(StringField.TYPE_STORED); + ft.setOmitNorms(false); + new Field("field", ft, "value") + +If you did this before (value can be String or Reader): + + new Field("field", value, Field.Store.NO, Field.Indexed.ANALYZED) + +you can now do this: + + new TextField("field", value) + +If instead the value was stored: + + new Field("field", value, Field.Store.YES, Field.Indexed.ANALYZED) + +you can now do this: + + new Field("field", TextField.TYPE_STORED, value) + +If in addition you omit norms: + + new Field("field", value, Field.Store.YES, Field.Indexed.ANALYZED_NO_NORMS) + +you can now do this: + + FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setOmitNorms(true); + new Field("field", ft, value) + +If you did this before (bytes is a byte[]): + + new Field("field", bytes) + +you can now do this: + + new BinaryField("field", bytes) Index: lucene/src/java/org/apache/lucene/document/Field.java =================================================================== --- lucene/src/java/org/apache/lucene/document/Field.java (revision 1162944) +++ lucene/src/java/org/apache/lucene/document/Field.java (working copy) @@ -50,6 +50,7 @@ public Field(String name, FieldType type) { this.name = name; this.type = type; + type.freeze(); } public Field(String name, FieldType type, Reader reader) { @@ -63,6 +64,7 @@ this.name = name; this.fieldsData = reader; this.type = type; + type.freeze(); } public Field(String name, FieldType type, TokenStream tokenStream) { @@ -77,6 +79,7 @@ this.fieldsData = null; this.tokenStream = tokenStream; this.type = type; + type.freeze(); } public Field(String name, FieldType type, byte[] value) { @@ -87,12 +90,14 @@ this.fieldsData = new BytesRef(value, offset, length); this.type = type; this.name = name; + type.freeze(); } public Field(String name, FieldType type, BytesRef bytes) { this.fieldsData = bytes; this.type = type; this.name = name; + type.freeze(); } public Field(String name, FieldType type, String value) { @@ -114,6 +119,7 @@ this.type = type; this.name = name; this.fieldsData = value; + type.freeze(); } /** @@ -195,22 +201,6 @@ } /** - * Expert: change the value of this field. See setValue(String). - */ - /* - public void setValue(byte[] value, int offset, int length) { - if (!isBinary) { - throw new IllegalArgumentException( - "cannot set a byte[] value on a non-binary field"); - } - fieldsData = value; - binaryLength = length; - binaryOffset = offset; - } - */ - - /** * Expert: sets the token stream to be used for indexing and causes * isIndexed() and isTokenized() to return true. May be combined with stored * values from stringValue() or getBinaryValue() @@ -316,7 +306,7 @@ result.append(name); result.append(':'); - if (fieldsData != null && type.lazy() == false) { + if (fieldsData != null) { result.append(fieldsData); } Index: lucene/src/java/org/apache/lucene/document/FieldType.java =================================================================== --- lucene/src/java/org/apache/lucene/document/FieldType.java (revision 1162944) +++ lucene/src/java/org/apache/lucene/document/FieldType.java (working copy) @@ -29,7 +29,6 @@ private boolean storeTermVectorPositions; private boolean omitNorms; private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; - private boolean lazy; private boolean frozen; public FieldType(FieldType ref) { @@ -41,7 +40,7 @@ this.storeTermVectorPositions = ref.storeTermVectorPositions(); this.omitNorms = ref.omitNorms(); this.indexOptions = ref.indexOptions(); - this.lazy = ref.lazy(); + // Do not copy frozen! } public FieldType() { @@ -52,7 +51,9 @@ throw new IllegalStateException(); } } - + + /** Prevents future changes. Note that when a FieldType + * is first bound to a Field instance, it is frozen. */ public void freeze() { this.frozen = true; } @@ -129,15 +130,6 @@ this.indexOptions = value; } - public boolean lazy() { - return this.lazy; - } - - public void setLazy(boolean value) { - checkIfFrozen(); - this.lazy = value; - } - /** Prints a Field for human consumption. */ @Override public final String toString() { @@ -177,9 +169,6 @@ result.append(",indexOptions="); result.append(indexOptions); } - if (lazy()){ - result.append(",lazy"); - } return result.toString(); } Index: lucene/src/java/org/apache/lucene/document/BinaryField.java =================================================================== --- lucene/src/java/org/apache/lucene/document/BinaryField.java (revision 1162944) +++ lucene/src/java/org/apache/lucene/document/BinaryField.java (working copy) @@ -19,6 +19,8 @@ * limitations under the License. */ +/** A field with byte[] value that is only stored. */ + public final class BinaryField extends Field { public static final FieldType TYPE_STORED = new FieldType(); @@ -27,23 +29,18 @@ TYPE_STORED.freeze(); } + /** Creates a new BinaryField */ public BinaryField(String name, byte[] value) { super(name, BinaryField.TYPE_STORED, value); } + /** Creates a new BinaryField */ public BinaryField(String name, byte[] value, int offset, int length) { super(name, BinaryField.TYPE_STORED, value, offset, length); } + /** Creates a new BinaryField */ public BinaryField(String name, BytesRef bytes) { super(name, BinaryField.TYPE_STORED, bytes); } - - public BinaryField(String name, FieldType custom, byte[] value) { - super(name, custom, value); - } - - public boolean isNumeric() { - return false; - } } Index: lucene/src/java/org/apache/lucene/document/TextField.java =================================================================== --- lucene/src/java/org/apache/lucene/document/TextField.java (revision 1162944) +++ lucene/src/java/org/apache/lucene/document/TextField.java (working copy) @@ -21,10 +21,22 @@ import org.apache.lucene.analysis.TokenStream; +/** A field that is indexed and tokenized, without term + * vectors. For example this would be used on a 'body' + * field, that contains the bulk of a document's text. + * + * This field's value is not stored by default; use the + * {@link TYPE_STORED} type (pass it to new + * Field) to store the value. */ + public final class TextField extends Field { + /* Indexed, tokenized, not stored. */ public static final FieldType TYPE_UNSTORED = new FieldType(); + + /* Indexed, tokenized, stored. */ public static final FieldType TYPE_STORED = new FieldType(); + static { TYPE_UNSTORED.setIndexed(true); TYPE_UNSTORED.setTokenized(true); @@ -35,20 +47,19 @@ TYPE_STORED.setTokenized(true); TYPE_STORED.freeze(); } - + + /** Creates a new un-stored TextField */ public TextField(String name, Reader reader) { super(name, TextField.TYPE_UNSTORED, reader); } + /** Creates a new un-stored TextField */ public TextField(String name, String value) { super(name, TextField.TYPE_UNSTORED, value); } + /** Creates a new un-stored TextField */ public TextField(String name, TokenStream stream) { super(name, TextField.TYPE_UNSTORED, stream); } - - public boolean isNumeric() { - return false; - } } Index: lucene/src/java/org/apache/lucene/document/StringField.java =================================================================== --- lucene/src/java/org/apache/lucene/document/StringField.java (revision 1162944) +++ lucene/src/java/org/apache/lucene/document/StringField.java (working copy) @@ -19,10 +19,26 @@ * limitations under the License. */ +/** A field that is indexed but not tokenized: the entire + * String value is indexed as a single token. For example + * this might be used for a 'country' field or an 'id' + * field, or any field that you intend to use for sorting + * or access through the field cache. + * + *

This field's value is not stored by default; use the + * {@link TYPE_STORED} type (pass it to new + * Field) to store the value. */ + public final class StringField extends Field { + /** Indexed, not tokenized, omits norms, indexes + * DOCS_ONLY, not stored. */ public static final FieldType TYPE_UNSTORED = new FieldType(); + + /** Indexed, not tokenized, omits norms, indexes + * DOCS_ONLY, stored */ public static final FieldType TYPE_STORED = new FieldType(); + static { TYPE_UNSTORED.setIndexed(true); TYPE_UNSTORED.setOmitNorms(true); @@ -36,6 +52,7 @@ TYPE_STORED.freeze(); } + /** Creates a new un-stored StringField */ public StringField(String name, String value) { super(name, TYPE_UNSTORED, value); } @@ -44,8 +61,4 @@ public String stringValue() { return (fieldsData == null) ? null : fieldsData.toString(); } - - public boolean isNumeric() { - return false; - } } Index: lucene/src/java/overview.html =================================================================== --- lucene/src/java/overview.html (revision 1162944) +++ lucene/src/java/overview.html (working copy) @@ -56,8 +56,7 @@ new IndexWriter.MaxFieldLength(25000)); Document doc = new Document(); String text = "This is the text to be indexed."; - doc.add(new Field("fieldname", text, Field.Store.YES, - Field.Index.ANALYZED)); + doc.add(new Field("fieldname", TextField.TYPE_STORED, text)); iwriter.addDocument(doc); iwriter.close();