diff --git lucene/build.xml lucene/build.xml index 96be669..e145c21 100644 --- lucene/build.xml +++ lucene/build.xml @@ -557,6 +557,14 @@ + + + + + + + + @@ -570,6 +579,7 @@ diff --git lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java index fe2008c..f3a2958 100644 --- lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java +++ lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java @@ -17,7 +17,6 @@ package org.apache.lucene.analysis; * limitations under the License. */ -import org.apache.lucene.index.IndexableField; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.util.CloseableThreadLocal; @@ -48,7 +47,7 @@ public abstract class Analyzer { /** * Creates a new {@link TokenStreamComponents} instance for this analyzer. - * + * * @param fieldName * the name of the fields content passed to the * {@link TokenStreamComponents} sink as a reader @@ -71,7 +70,7 @@ public abstract class Analyzer { * method will reuse the previously stored components after resetting them * through {@link TokenStreamComponents#setReader(Reader)}. *

- * + * * @param fieldName the name of the field the created TokenStream is used for * @param reader the reader the streams source reads from */ @@ -87,7 +86,7 @@ public abstract class Analyzer { } return components.getTokenStream(); } - + /** * Override this if you want to add a CharFilter chain. */ @@ -120,11 +119,12 @@ public abstract class Analyzer { * fields. This method is only called if the field * produced at least one token for indexing. * - * @param field the field just indexed + * @param name The name of the field just indexed + * @param tokenized true if the field just indexed is tokenized * @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)} */ - public int getOffsetGap(IndexableField field) { - if (field.fieldType().tokenized()) { + public int getOffsetGap(String name, boolean tokenized) { + if (tokenized) { return 1; } else { return 0; @@ -149,7 +149,7 @@ public abstract class Analyzer { /** * Creates a new {@link TokenStreamComponents} instance. - * + * * @param source * the analyzer's tokenizer * @param result @@ -160,10 +160,10 @@ public abstract class Analyzer { this.source = source; this.sink = result; } - + /** * Creates a new {@link TokenStreamComponents} instance. - * + * * @param source * the analyzer's tokenizer */ @@ -175,7 +175,7 @@ public abstract class Analyzer { /** * Resets the encapsulated components with the given reader. If the components * cannot be reset, an Exception should be thrown. - * + * * @param reader * a reader to reset the source component * @throws IOException @@ -187,7 +187,7 @@ public abstract class Analyzer { /** * Returns the sink {@link TokenStream} - * + * * @return the sink {@link TokenStream} */ public TokenStream getTokenStream() { diff --git lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java index a7256e9..b5978ad 100644 --- lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java +++ lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java @@ -17,7 +17,6 @@ package org.apache.lucene.analysis; * limitations under the License. */ -import org.apache.lucene.index.IndexableField; import java.io.Reader; @@ -83,8 +82,8 @@ public abstract class AnalyzerWrapper extends Analyzer { * {@inheritDoc} */ @Override - public final int getOffsetGap(IndexableField field) { - return getWrappedAnalyzer(field.name()).getOffsetGap(field); + public final int getOffsetGap(String name, boolean tokenized) { + return getWrappedAnalyzer(name).getOffsetGap(name, tokenized); } @Override diff --git lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java index d347027..bacf671 100644 --- lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java +++ lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java @@ -21,12 +21,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.document.DoubleField; // for javadocs -import org.apache.lucene.document.FloatField; // for javadocs -import org.apache.lucene.document.IntField; // for javadocs -import org.apache.lucene.document.LongField; // for javadocs -import org.apache.lucene.search.NumericRangeFilter; // for javadocs -import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeReflector; @@ -35,11 +29,11 @@ import org.apache.lucene.util.NumericUtils; /** * Expert: This class provides a {@link TokenStream} - * for indexing numeric values that can be used by {@link - * NumericRangeQuery} or {@link NumericRangeFilter}. + * for indexing numeric values that can be used by + * NumericRangeQuery or NumericRangeFilter. * - *

Note that for simple usage, {@link IntField}, {@link - * LongField}, {@link FloatField} or {@link DoubleField} is + *

Note that for simple usage, IntField, + * LongField, FloatField or DoubleField is * recommended. These fields disable norms and * term freqs, as they are not usually needed during * searching. If you need to change these settings, you @@ -80,7 +74,7 @@ import org.apache.lucene.util.NumericUtils; * than one numeric field, use a separate NumericTokenStream * instance for each.

* - *

See {@link NumericRangeQuery} for more details on the + *

See NumericRangeQuery for more details on the * precisionStep * parameter as well as how numeric fields work under the hood.

diff --git lucene/core/src/java/org/apache/lucene/analysis/Token.java lucene/core/src/java/org/apache/lucene/analysis/Token.java index 7d33509..42b7ec0 100644 --- lucene/core/src/java/org/apache/lucene/analysis/Token.java +++ lucene/core/src/java/org/apache/lucene/analysis/Token.java @@ -24,7 +24,6 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeImpl; @@ -47,7 +46,7 @@ import org.apache.lucene.util.BytesRef; with type "eos". The default token type is "word".

A Token can optionally have metadata (a.k.a. payload) in the form of a variable - length byte array. Use {@link DocsAndPositionsEnum#getPayload()} to retrieve the + length byte array. Use DocsAndPositionsEnum#getPayload() to retrieve the payloads from the index.

diff --git lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java index 605b3cf..2cb8f7c 100644 --- lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java +++ lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java @@ -21,16 +21,12 @@ import java.io.IOException; import java.io.Closeable; import java.lang.reflect.Modifier; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; /** - * A TokenStream enumerates the sequence of tokens, either from - * {@link Field}s of a {@link Document} or from query text. + * A TokenStream enumerates the sequence of tokens from text. *

* This is an abstract class; concrete subclasses are: *

    @@ -124,7 +120,7 @@ public abstract class TokenStream extends AttributeSource implements Closeable { } /** - * Consumers (i.e., {@link IndexWriter}) use this method to advance the stream to + * Consumers (i.e., Lucene's IndexWriter) use this method to advance the stream to * the next token. Implementing classes must implement this method and update * the appropriate {@link AttributeImpl}s with the attributes of the next * token. diff --git lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java index 0ef2a6f..5013374 100644 --- lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java +++ lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -188,7 +188,7 @@ final class DocInverterPerField extends DocFieldConsumerPerField { } } - fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field); + fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field.name(), field.fieldType().tokenized()); fieldState.boost *= field.boost(); }