Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 929127) +++ CHANGES.txt (working copy) @@ -5,6 +5,7 @@ Changes in backwards compatibility policy * LUCENE-1458, LUCENE-2111: Changes from flexible indexing: + - MultiReader ctor now throws IOException - Directory.copy/Directory.copyTo now copies all files (not just @@ -12,9 +13,21 @@ dependent on the codecs used. (Mike McCandless) - UnicodeUtil now uses BytesRef for UTF-8 output, and some method - signatures have changed to CharSequence. These are advanced APIs + signatures have changed to CharSequence. These are internal APIs and subject to change suddenly. (Robert Muir, Mike McCandless) + - Positional queries (PhraseQuery, *SpanQuery) will now throw an + exception if use them on a field that omits positions during + indexing (previously they silently returned no results). + + - FieldCache.(Byte,Short,Int,Long,Float,Double}Parser's API has + changed -- each parse method now takes a BytesRef instead of a + String. If you have an existing Parser, a simple way to fix it is + invoke BytesRef.utf8ToString, and pass that String to your + existing parser. This will work, but performance would be better + if you could fix your parser to instead operate directly on the + byte[] in the BytesRef. + Bug Fixes * LUCENE-2222: FixedIntBlockIndexInput incorrectly read one block of Index: src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 929127) +++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -86,7 +86,7 @@ /** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link #getTermsEnum} * and update the boost on each returned term. This enables to control the boost factor * for each matching term in {@link #SCORING_BOOLEAN_QUERY_REWRITE} or - * {@link TOP_TERMS_SCORING_BOOLEAN_QUERY_REWRITE} mode. + * {@link TopTermsBooleanQueryRewrite} mode. * {@link FuzzyQuery} is using this to take the edit distance into account. */ public static interface BoostAttribute extends Attribute { @@ -95,7 +95,7 @@ /** Retrieves the boost, default is {@code 1.0f}. */ public float getBoost(); /** Sets the maximum boost for terms that would never get - * into the priority queue of {@link MultiTermQuery#TOP_TERMS_SCORING_BOOLEAN_REWRITE}. + * into the priority queue of {@link MultiTermQuery.TopTermsBooleanQueryRewrite}. * This value is not changed by {@link AttributeImpl#clear} * and not used in {@code equals()} and {@code hashCode()}. * Do not change the value in the {@link TermsEnum}! @@ -262,7 +262,6 @@ if (boostAtt != null) boostAtt.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost); } } - } private static class ScoringBooleanQueryRewrite extends BooleanQueryRewrite { Index: src/java/org/apache/lucene/search/PrefixTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/PrefixTermsEnum.java (revision 929127) +++ src/java/org/apache/lucene/search/PrefixTermsEnum.java (working copy) @@ -27,7 +27,7 @@ * Subclass of FilteredTermEnum for enumerating all terms that match the * specified prefix filter term. *

Term enumerations are always ordered by - * {@link #getTermComparator}. Each term in the enumeration is + * {@link #getComparator}. Each term in the enumeration is * greater than all that precede it.

*/ public class PrefixTermsEnum extends FilteredTermsEnum { Index: src/java/org/apache/lucene/search/FilteredTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/FilteredTermsEnum.java (revision 929127) +++ src/java/org/apache/lucene/search/FilteredTermsEnum.java (working copy) @@ -34,7 +34,7 @@ * Abstract class for enumerating a subset of all terms. * *

Term enumerations are always ordered by - * {@link #getTermComparator}. Each term in the enumeration is + * {@link #getComparator}. Each term in the enumeration is * greater than all that precede it.

*

Please note: Consumers of this enum cannot * call {@code seek()}, it is forward only; it throws @@ -52,7 +52,7 @@ /** Return value, if term should be accepted or the iteration should * {@code END}. The {@code *_SEEK} values denote, that after handling the current term - * the enum should call {@link nextSeekTerm()} and step forward. + * the enum should call {@link #nextSeekTerm} and step forward. * @see #accept(BytesRef) */ protected static enum AcceptStatus {YES, YES_AND_SEEK, NO, NO_AND_SEEK, END}; Index: src/java/org/apache/lucene/search/spans/SpanTermQuery.java =================================================================== --- src/java/org/apache/lucene/search/spans/SpanTermQuery.java (revision 929127) +++ src/java/org/apache/lucene/search/spans/SpanTermQuery.java (working copy) @@ -85,15 +85,22 @@ public Spans getSpans(final IndexReader reader) throws IOException { // NOTE: debateably, the caller should never pass in a // multi reader... + final BytesRef textBytes = new BytesRef(term.text()); final DocsAndPositionsEnum postings = MultiFields.getTermPositionsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), - new BytesRef(term.text())); + textBytes); if (postings != null) { return new TermSpans(postings, term); } else { - return TermSpans.EMPTY_TERM_SPANS; + if (MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), textBytes) != null) { + // term does exist, but has no positions + throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run SpanTermQuery (term=" + term.text() + ")"); + } else { + // term does not exist + return TermSpans.EMPTY_TERM_SPANS; + } } } } Index: src/java/org/apache/lucene/search/TermRangeTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/TermRangeTermsEnum.java (revision 929127) +++ src/java/org/apache/lucene/search/TermRangeTermsEnum.java (working copy) @@ -28,7 +28,7 @@ * Subclass of FilteredTermEnum for enumerating all terms that match the * specified range parameters. *

Term enumerations are always ordered by - * {@link #getTermComparator}. Each term in the enumeration is + * {@link #getComparator}. Each term in the enumeration is * greater than all that precede it.

*/ public class TermRangeTermsEnum extends FilteredTermsEnum { Index: src/java/org/apache/lucene/search/Scorer.java =================================================================== --- src/java/org/apache/lucene/search/Scorer.java (revision 929127) +++ src/java/org/apache/lucene/search/Scorer.java (working copy) @@ -36,8 +36,6 @@ * not valid scores. Certain collectors (eg {@link * TopScoreDocCollector}) will not properly collect hits * with these scores. - * - * @see BooleanQuery#setAllowDocsOutOfOrder */ public abstract class Scorer extends DocIdSetIterator { private final Similarity similarity; @@ -90,7 +88,7 @@ } /** Returns the score of the current document matching the query. - * Initially invalid, until {@link #next()} or {@link #skipTo(int)} + * Initially invalid, until {@link #nextDoc()} or {@link #advance(int)} * is called the first time, or when called from within * {@link Collector#collect}. */ Index: src/java/org/apache/lucene/search/AutomatonTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/AutomatonTermsEnum.java (revision 929127) +++ src/java/org/apache/lucene/search/AutomatonTermsEnum.java (working copy) @@ -132,7 +132,8 @@ } /** - * Returns true if the term matches the automaton. + * Returns true if the term matches the automaton. Also stashes away the term + * to assist with smart enumeration. */ @Override protected AcceptStatus accept(final BytesRef term) { Index: src/java/org/apache/lucene/search/FuzzyTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/FuzzyTermsEnum.java (revision 929127) +++ src/java/org/apache/lucene/search/FuzzyTermsEnum.java (working copy) @@ -40,7 +40,7 @@ * to the specified filter term. * *

Term enumerations are always ordered by - * {@link #getTermComparator}. Each term in the enumeration is + * {@link #getComparator}. Each term in the enumeration is * greater than all that precede it.

*/ public final class FuzzyTermsEnum extends TermsEnum { Index: src/java/org/apache/lucene/index/Terms.java =================================================================== --- src/java/org/apache/lucene/index/Terms.java (revision 929127) +++ src/java/org/apache/lucene/index/Terms.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.util.CloseableThreadLocal; /** - * Access to the terms in a specific field. See {@link #Fields}. + * Access to the terms in a specific field. See {@link Fields}. * @lucene.experimental */ Index: src/java/org/apache/lucene/index/DocsAndPositionsEnum.java =================================================================== --- src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (revision 929127) +++ src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (working copy) @@ -25,7 +25,7 @@ public abstract class DocsAndPositionsEnum extends DocsEnum { /** Returns the next position. You should only call this - * up to {@link FormatPostingsDocsEnum#freq()} times else + * up to {@link DocsEnum#freq()} times else * the behavior is not defined. */ public abstract int nextPosition() throws IOException; Index: src/java/org/apache/lucene/index/DocsEnum.java =================================================================== --- src/java/org/apache/lucene/index/DocsEnum.java (revision 929127) +++ src/java/org/apache/lucene/index/DocsEnum.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.util.IntsRef; /** Iterates through the documents, term freq and positions. - * NOTE: you must first call {@link #next}. + * NOTE: you must first call {@link #nextDoc}. * * @lucene.experimental */ public abstract class DocsEnum extends DocIdSetIterator { @@ -32,8 +32,8 @@ private AttributeSource atts = null; /** Returns term frequency in the current document. Do - * not call this before {@link #next} is first called, - * nor after {@link #next} returns NO_MORE_DOCS. */ + * not call this before {@link #nextDoc} is first called, + * nor after {@link #nextDoc} returns NO_MORE_DOCS. */ public abstract int freq(); /** Returns the related attributes. */ @@ -66,7 +66,7 @@ } /** Bulk read (docs and freqs). After this is called, - * {@link #doc} and {@link #freq} are undefined. This + * {@link #docID()} and {@link #freq} are undefined. This * returns the count read, or 0 if the end is reached. * The IntsRef for docs and freqs will not have their * length set. Index: src/java/org/apache/lucene/index/TermsEnum.java =================================================================== --- src/java/org/apache/lucene/index/TermsEnum.java (revision 929127) +++ src/java/org/apache/lucene/index/TermsEnum.java (working copy) @@ -26,11 +26,12 @@ /** Iterator to seek ({@link #seek}) or step through ({@link * #next} terms, obtain frequency information ({@link - * #docFreq}), and obtain a {@link DocsEnum} for the current - * term ({@link #docs)}. + * #docFreq}), and obtain a {@link DocsEnum} or {@link + * DocsAndPositionsEnum} for the current term ({@link + * #docs}. * *

Term enumerations are always ordered by - * {@link #getTermComparator}. Each term in the enumeration is + * {@link #getComparator}. Each term in the enumeration is * greater than all that precede it.

* *

On obtaining a TermsEnum, you must first call @@ -47,7 +48,7 @@ return atts; } - /** Represents returned result from {@link TermsEnum.seek}. + /** Represents returned result from {@link #seek}. * If status is FOUND, then the precise term was found. * If status is NOT_FOUND, then a different term was * found. If the status is END, the end of the iteration @@ -70,7 +71,7 @@ /** Seeks to the specified term by ordinal (position) as * previously returned by {@link #ord}. The target ord * may be befor or after the current ord. See {@link - * #seek(BytesRef). */ + * #seek(BytesRef)}. */ public abstract SeekStatus seek(long ord) throws IOException; /** Increments the enumeration to the next element. Index: src/java/org/apache/lucene/index/TermPositions.java =================================================================== --- src/java/org/apache/lucene/index/TermPositions.java (revision 929127) +++ src/java/org/apache/lucene/index/TermPositions.java (working copy) @@ -26,7 +26,7 @@ * positions of each occurrence of a term in a document. * * @see IndexReader#termPositions() - * @deprecated Use {@link PositionsEnum} instead + * @deprecated Use {@link DocsAndPositionsEnum} instead */ @Deprecated public interface TermPositions Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 929127) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.store.*; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.ReaderUtil; // for javadocs import java.io.File; import java.io.FileOutputStream; @@ -941,7 +942,7 @@ * *

The enumeration is ordered by document number. Each document number * is greater than all that precede it in the enumeration. - * @deprecated Use the new flex API ({@link #termDocsEnum()}) instead. + * @deprecated Use the new flex API ({@link #termDocsEnum}) instead. * @throws IOException if there is a low-level IO error */ @Deprecated @@ -1021,7 +1022,7 @@ *

The enumeration is ordered by document number. Each document number is * greater than all that precede it in the enumeration. * @deprecated Please switch the flex API ({@link - * #termDocsEnum()}) instead + * #termDocsEnum}) instead * @throws IOException if there is a low-level IO error */ @Deprecated @@ -1034,7 +1035,7 @@ /** Returns an unpositioned {@link TermPositions} enumerator. * @deprecated Please switch the flex API ({@link - * #termDocsEnum()}) instead + * #termDocsEnum}) instead * @throws IOException if there is a low-level IO error */ @Deprecated Index: src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 929127) +++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -1075,7 +1075,7 @@ termRef.copy(term.text()); - if (termsEnum.seek(termRef) == TermsEnum.SeekStatus.FOUND) { + if (termsEnum.seek(termRef, false) == TermsEnum.SeekStatus.FOUND) { DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs); if (docsEnum != null) { Index: src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java (revision 929127) +++ src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.util.CodecUtil; /** Reads IndexInputs written with {@link - * SingleIntIndexoutput}. NOTE: this class is just for + * SingleIntIndexOutput}. NOTE: this class is just for * demonstration puprposes (it is a very slow way to read a * block of ints). * Index: src/java/org/apache/lucene/index/codecs/standard/TermState.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/TermState.java (revision 929127) +++ src/java/org/apache/lucene/index/codecs/standard/TermState.java (working copy) @@ -17,8 +17,10 @@ * limitations under the License. */ +import org.apache.lucene.index.DocsEnum; // for javadocs + /** - * Holds all state required for {@link StandardDocsProducer} + * Holds all state required for {@link StandardPostingsReader} * to produce a {@link DocsEnum} without re-seeking the * terms dict. * @lucene.experimental Index: src/java/org/apache/lucene/index/MultiFields.java =================================================================== --- src/java/org/apache/lucene/index/MultiFields.java (revision 929127) +++ src/java/org/apache/lucene/index/MultiFields.java (working copy) @@ -23,6 +23,7 @@ import java.util.List; import java.util.ArrayList; import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.MultiBits; @@ -31,7 +32,7 @@ * Exposes flex API, merged from flex API of sub-segments. * This is useful when you're interacting with an {@link * IndexReader} implementation that consists of sequential - * sub-readers (eg {@link DirectoryReade} or {@link + * sub-readers (eg {@link DirectoryReader} or {@link * MultiReader}). * *

NOTE: for multi readers, you'll get better @@ -53,7 +54,7 @@ * *

: this is a slow way to access postings. * It's better to get the sub-readers (using {@link - * ReaderUtil#Gather}) and iterate through them + * Gather}) and iterate through them * yourself. */ public static Fields getFields(IndexReader r) throws IOException { final IndexReader[] subs = r.getSequentialSubReaders(); Index: src/java/org/apache/lucene/util/ReaderUtil.java =================================================================== --- src/java/org/apache/lucene/util/ReaderUtil.java (revision 929127) +++ src/java/org/apache/lucene/util/ReaderUtil.java (working copy) @@ -48,7 +48,11 @@ } /** - * Gathers sub-readers from reader into a List. + * Gathers sub-readers from reader into a List. See + * {@link Gather} for are more general way to gather + * whatever you need to, per reader. + * + * @lucene.experimental * * @param allSubReaders * @param reader @@ -68,6 +72,11 @@ } } + /** Recursively visits all sub-readers of a reader. You + * should subclass this and override the add method to + * gather what you need. + * + * @lucene.experimental */ public static abstract class Gather { private final IndexReader topReader; Index: src/java/org/apache/lucene/util/packed/package.html =================================================================== --- src/java/org/apache/lucene/util/packed/package.html (revision 929127) +++ src/java/org/apache/lucene/util/packed/package.html (working copy) @@ -9,7 +9,7 @@ access speed. The standard usage scenario is replacing large int or long arrays in order to reduce the memory footprint.

- The main access point is the {@link PackedInts} factory. + The main access point is the {@link org.apache.lucene.util.packed.PackedInts} factory.