### Eclipse Workspace Patch 1.0 #P Lucene Index: lucene/src/java/org/apache/lucene/document/FieldType.java =================================================================== --- lucene/src/java/org/apache/lucene/document/FieldType.java (revision 0) +++ lucene/src/java/org/apache/lucene/document/FieldType.java (revision 0) @@ -0,0 +1,147 @@ +package org.apache.lucene.document; + +import java.util.EnumSet; +import java.util.Map; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +public class FieldType implements CoreFieldTypeAttribute { + + public enum Property { + INDEXED + , STORED + , TOKENIZED + , STORE_TERM_VECTORS + , STORE_OFFSETS_WITH_TERM_VECTORS + , STORE_POSITIONS_WITH_TERM_VECTORS + , INDEX_NORMS + , INDEX_TERM_FREQS + , INDEX_POSITIONS + , LAZY + } + + private final Map, FieldTypeAttribute> attributes; + private final EnumSet properties; + + public FieldType(EnumSet properties) { + this(properties, null); + } + + public FieldType(EnumSet properties, Map, FieldTypeAttribute> attributes) { + this.properties = properties; + this.attributes = attributes; + + if (this.attributes != null) { + this.attributes.put(CoreFieldTypeAttribute.class, this); + } + } + + public boolean isIndexed() { + return properties.contains(Property.INDEXED); + } + + public boolean isStored() { + return properties.contains(Property.STORED); + } + + public boolean isTokenized() { + return properties.contains(Property.TOKENIZED); + } + + public boolean isTermVectorsStored() { + return properties.contains(Property.STORE_TERM_VECTORS); + } + + public boolean isOffsetsStoredWithTermVectors() { + return properties.contains(Property.STORE_OFFSETS_WITH_TERM_VECTORS); + } + + public boolean isPositionsStoredWithTermVectors() { + return properties.contains(Property.STORE_POSITIONS_WITH_TERM_VECTORS); + } + + public boolean isNormsIndexed() { + return properties.contains(Property.INDEX_NORMS); + } + + public boolean isTermFreqsIndexed() { + return properties.contains(Property.INDEX_TERM_FREQS); + } + + public boolean isPositionsIndexed() { + return properties.contains(Property.INDEX_POSITIONS); + } + + public boolean isLazy() { + return properties.contains(Property.LAZY); + } + + public A getAttribute(Class attributeClass) { + if (attributes == null) { + if (CoreFieldTypeAttribute.class == attributeClass) { + return attributeClass.cast(this); + } + return null; + } + return attributeClass.cast(attributes.get(attributeClass)); + } + + /** Prints a Field for human consumption. */ + @Override + public final String toString() { + StringBuilder result = new StringBuilder(); + if (isStored()) { + result.append("stored"); + } + if (isIndexed()) { + if (result.length() > 0) + result.append(","); + result.append("indexed"); + } + if (isTokenized()) { + if (result.length() > 0) + result.append(","); + result.append("tokenized"); + } + if (isTermVectorsStored()) { + if (result.length() > 0) + result.append(","); + result.append("termVector"); + } + if (isOffsetsStoredWithTermVectors()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorOffsets"); + } + if (isPositionsStoredWithTermVectors()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorPosition"); + } + if (isNormsIndexed()) { + result.append(",omitNorms"); + } + if (isTermFreqsIndexed()) { + result.append(",omitTermFreqAndPositions"); + } + if (isLazy()){ + result.append(",lazy"); + } + + return result.toString(); + } +} Index: lucene/src/java/org/apache/lucene/document/CoreField.java =================================================================== --- lucene/src/java/org/apache/lucene/document/CoreField.java (revision 0) +++ lucene/src/java/org/apache/lucene/document/CoreField.java (revision 0) @@ -0,0 +1,190 @@ +package org.apache.lucene.document; + +/** + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.util.StringHelper; // for javadocs + +/** + * + * + **/ +public abstract class CoreField implements Fieldable { + + protected String name = "body"; + protected float boost = 1.0f; + protected final FieldType type; + // the data object for all different kind of field values + protected Object fieldsData = null; + // pre-analyzed tokenStream for indexed fields + protected TokenStream tokenStream; + // length/offset for all primitive types + protected boolean isBinary = false; + protected int binaryLength; + protected int binaryOffset; + + protected CoreField() + { + FieldTypeBuilder builder = new FieldTypeBuilder(); + builder.indexed(true).tokenized(true); + this.type = builder.build(); // with defaults; + } + + protected CoreField(String name, FieldType type) { + if (name == null) throw new NullPointerException("name cannot be null"); + this.name = StringHelper.intern(name); // field names are interned + if (type == null) { + FieldTypeBuilder builder = new FieldTypeBuilder(); + builder.indexed(true).tokenized(true); + this.type = builder.build(); // with defaults; + } + else { + this.type = type; + } + this.isBinary = false; + } + + /** + * Sets the boost factor hits on this field. This value will be multiplied + * into the score of all hits on this this field of this document. + * + *

+ * The boost is multiplied by + * {@link org.apache.lucene.document.Document#getBoost()} of the document + * containing this field. If a document has multiple fields with the same + * name, all such values are multiplied together. This product is then used to + * compute the norm factor for the field. By default, in the + * {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} + * method, the boost value is multiplied by the length normalization factor + * and then rounded by + * {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before + * it is stored in the index. One should attempt to ensure that this product + * does not overflow the range of that encoding. + * + * @see org.apache.lucene.document.Document#setBoost(float) + * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState) + * @see org.apache.lucene.search.Similarity#encodeNormValue(float) + */ + public void setBoost(float boost) { + this.boost = boost; + } + + /** + * Returns the boost factor for hits for this field. + * + *

+ * The default value is 1.0. + * + *

+ * Note: this value is not stored directly with the document in the index. + * Documents returned from + * {@link org.apache.lucene.index.IndexReader#document(int)} and + * {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have + * the same value present as when this field was indexed. + * + * @see #setBoost(float) + */ + public float getBoost() { + return boost; + } + + /** + * Returns the name of the field as an interned string. For example "date", + * "title", "body", ... + */ + public String name() { + return name; + } + + /** + * Returns field type containing additional properties. For example: indexed, stored, ... + */ + public FieldType getType() { + return type; + } + + /** True iff the value of the filed is stored as binary */ + public final boolean isBinary() { + return isBinary; + } + + /** + * Return the raw byte[] for the binary field. Note that you must also call + * {@link #getBinaryLength} and {@link #getBinaryOffset} to know which range + * of bytes in this returned array belong to the field. + * + * @return reference to the Field value as byte[]. + */ + public byte[] getBinaryValue() { + return getBinaryValue(null); + } + + public byte[] getBinaryValue(byte[] result) { + if (isBinary || fieldsData instanceof byte[]) return (byte[]) fieldsData; + else return null; + } + + /** + * Returns length of byte[] segment that is used as value, if Field is not + * binary returned value is undefined + * + * @return length of byte[] segment that represents this Field value + */ + public int getBinaryLength() { + if (isBinary) { + return binaryLength; + } else if (fieldsData instanceof byte[]) return ((byte[]) fieldsData).length; + else return 0; + } + + /** + * Returns offset into byte[] segment that is used as value, if Field is not + * binary returned value is undefined + * + * @return index of the first character in byte[] segment that represents this + * Field value + */ + public int getBinaryOffset() { + return binaryOffset; + } + + /** Prints a Field for human consumption. */ + @Override + public final String toString() { + StringBuilder result = new StringBuilder(); + + if (isBinary) { + if (result.length() > 0) result.append(","); + result.append("binary"); + } + + if (result.length() > 0) result.append(","); + result.append(type.toString()); + + result.append('<'); + result.append(name); + result.append(':'); + + if (fieldsData != null && type.isLazy() == false) { + result.append(fieldsData); + } + + result.append('>'); + return result.toString(); + } +} Index: lucene/src/java/org/apache/lucene/document/CoreFieldTypeAttribute.java =================================================================== --- lucene/src/java/org/apache/lucene/document/CoreFieldTypeAttribute.java (revision 0) +++ lucene/src/java/org/apache/lucene/document/CoreFieldTypeAttribute.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.document; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +public interface CoreFieldTypeAttribute extends FieldTypeAttribute { + + boolean isIndexed(); + + boolean isStored(); + + boolean isTokenized(); + + boolean isTermVectorsStored(); + + boolean isOffsetsStoredWithTermVectors(); + + boolean isPositionsStoredWithTermVectors(); + + boolean isNormsIndexed(); + + boolean isTermFreqsIndexed(); + + boolean isPositionsIndexed(); + + boolean isLazy(); +} Index: lucene/src/java/org/apache/lucene/document/Fieldable.java =================================================================== --- lucene/src/java/org/apache/lucene/document/Fieldable.java (revision 1131340) +++ lucene/src/java/org/apache/lucene/document/Fieldable.java (working copy) @@ -67,6 +67,11 @@ * @see #setBoost(float) */ float getBoost(); + + /** + * Returns field type containing additional properties. For example: indexed, stored, ... + */ + public FieldType getType(); /** Returns the name of the field as an interned string. * For example "date", "title", "body", ... @@ -94,61 +99,8 @@ */ public TokenStream tokenStreamValue(); - /** True if the value of the field is to be stored in the index for return - with search hits. */ - boolean isStored(); - - /** True if the value of the field is to be indexed, so that it may be - searched on. */ - boolean isIndexed(); - - /** True if the value of the field should be tokenized as text prior to - indexing. Un-tokenized fields are indexed as a single word and may not be - Reader-valued. */ - boolean isTokenized(); - - /** True if the term or terms used to index this field are stored as a term - * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. - * These methods do not provide access to the original content of the field, - * only to terms used to index it. If the original content must be - * preserved, use the stored attribute instead. - * - * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String) - */ - boolean isTermVectorStored(); - - /** - * True if terms are stored as term vector together with their offsets - * (start and end positon in source text). - */ - boolean isStoreOffsetWithTermVector(); - - /** - * True if terms are stored as term vector together with their token positions. - */ - boolean isStorePositionWithTermVector(); - /** True if the value of the field is stored as binary */ - boolean isBinary(); - - /** True if norms are omitted for this indexed field */ - boolean getOmitNorms(); - - /** Expert: - * - * If set, omit normalization factors associated with this indexed field. - * This effectively disables indexing boosts and length normalization for this field. - */ - void setOmitNorms(boolean omitNorms); - - /** - * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving - * it's values via {@link #stringValue()} or {@link #getBinaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that - * retrieved the {@link Document} is still open. - * - * @return true if this field can be loaded lazily - */ - boolean isLazy(); + boolean isBinary(); /** * Returns offset into byte[] segment that is used as value, if Field is not binary @@ -190,20 +142,5 @@ * @return reference to the Field value as byte[]. */ abstract byte[] getBinaryValue(byte[] result); - - /** @see #setOmitTermFreqAndPositions */ - boolean getOmitTermFreqAndPositions(); - - /** Expert: - * - * If set, omit term freq, positions and payloads from - * postings for this field. - * - *

NOTE: While this option reduces storage space - * required in the index, it also means any query - * requiring positional information, such as {@link - * PhraseQuery} or {@link SpanQuery} subclasses will - * fail with an exception. - */ - void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions); + } Index: lucene/src/java/org/apache/lucene/document/FieldTypeAttribute.java =================================================================== --- lucene/src/java/org/apache/lucene/document/FieldTypeAttribute.java (revision 0) +++ lucene/src/java/org/apache/lucene/document/FieldTypeAttribute.java (revision 0) @@ -0,0 +1,20 @@ +package org.apache.lucene.document; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +public interface FieldTypeAttribute { +} Index: lucene/src/java/org/apache/lucene/document/FieldTypeBuilder.java =================================================================== --- lucene/src/java/org/apache/lucene/document/FieldTypeBuilder.java (revision 0) +++ lucene/src/java/org/apache/lucene/document/FieldTypeBuilder.java (revision 0) @@ -0,0 +1,99 @@ +package org.apache.lucene.document; + +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.document.FieldType.Property; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +public class FieldTypeBuilder { + + private EnumSet properties; + private Map, FieldTypeAttribute> attributes; + + public FieldTypeBuilder indexed(boolean indexed) { + addPropertyIfTrue(indexed, FieldType.Property.INDEXED); + return this; + } + + public FieldTypeBuilder stored(boolean stored) { + addPropertyIfTrue(stored, FieldType.Property.STORED); + return this; + } + + public FieldTypeBuilder tokenized(boolean tokenized) { + addPropertyIfTrue(tokenized, FieldType.Property.TOKENIZED); + return this; + } + + public FieldTypeBuilder storeTermVectors(boolean storeTermVectors) { + addPropertyIfTrue(storeTermVectors, FieldType.Property.STORE_TERM_VECTORS); + return this; + } + + public FieldTypeBuilder storeOffsetsWithTermVectors(boolean storeOffsets) { + addPropertyIfTrue(storeOffsets, FieldType.Property.STORE_OFFSETS_WITH_TERM_VECTORS); + return this; + } + + public FieldTypeBuilder storePositionsWithTermVectors(boolean storePositions) { + addPropertyIfTrue(storePositions, FieldType.Property.STORE_POSITIONS_WITH_TERM_VECTORS); + return this; + } + + public FieldTypeBuilder indexNorms(boolean indexNorms) { + addPropertyIfTrue(indexNorms, FieldType.Property.INDEX_NORMS); + return this; + } + + public FieldTypeBuilder indexTermFreqs(boolean indexTermFreqs) { + addPropertyIfTrue(indexTermFreqs, FieldType.Property.INDEX_TERM_FREQS); + return this; + } + + public FieldTypeBuilder indexPositions(boolean indexPositions) { + addPropertyIfTrue(indexPositions, FieldType.Property.INDEX_POSITIONS); + return this; + } + + public FieldTypeBuilder lazy(boolean lazy) { + addPropertyIfTrue(lazy, FieldType.Property.LAZY); + return this; + } + + public FieldTypeBuilder addAttribute(FieldTypeAttribute typeAttribute) { + if (attributes == null) { + attributes = new HashMap, FieldTypeAttribute>(); + } + attributes.put(typeAttribute.getClass(), typeAttribute); + return this; + } + + public FieldType build() { + return new FieldType(properties, attributes); + } + + // ================================================= Helper Methods ================================================ + + private void addPropertyIfTrue(boolean value, Property propery) { + if (value) { + properties.add(propery); + } + } +}