Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 960362) +++ lucene/CHANGES.txt (working copy) @@ -361,6 +361,11 @@ with equivalent ones that take a String (id) as argument. You can pass whatever ID you want, as long as you use the same one when calling both. (Shai Erera) + +* LUCENE-2525: Move UnionDocsAndPositionsEnum out of MultiPhrasQuery, + making it public in oal.index; this "matches" the + MultipleTermPositions from pre-flex. (Peter Wilkins via Mike + McCandless) Bug fixes Index: lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (revision 960362) +++ lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (working copy) @@ -22,12 +22,11 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.UnionDocsAndPositionsEnum; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; -import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.Bits; /** @@ -421,170 +420,3 @@ return true; } } - -/** - * Takes the logical union of multiple DocsEnum iterators. - */ - -// TODO: if ever we allow subclassing of the *PhraseScorer -class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { - - private static final class DocsQueue extends PriorityQueue { - DocsQueue(List docsEnums) throws IOException { - initialize(docsEnums.size()); - - Iterator i = docsEnums.iterator(); - while (i.hasNext()) { - DocsAndPositionsEnum postings = (DocsAndPositionsEnum) i.next(); - if (postings.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { - add(postings); - } - } - } - - final public DocsEnum peek() { - return top(); - } - - @Override - public final boolean lessThan(DocsAndPositionsEnum a, DocsAndPositionsEnum b) { - return a.docID() < b.docID(); - } - } - - private static final class IntQueue { - private int _arraySize = 16; - private int _index = 0; - private int _lastIndex = 0; - private int[] _array = new int[_arraySize]; - - final void add(int i) { - if (_lastIndex == _arraySize) - growArray(); - - _array[_lastIndex++] = i; - } - - final int next() { - return _array[_index++]; - } - - final void sort() { - Arrays.sort(_array, _index, _lastIndex); - } - - final void clear() { - _index = 0; - _lastIndex = 0; - } - - final int size() { - return (_lastIndex - _index); - } - - private void growArray() { - int[] newArray = new int[_arraySize * 2]; - System.arraycopy(_array, 0, newArray, 0, _arraySize); - _array = newArray; - _arraySize *= 2; - } - } - - private int _doc; - private int _freq; - private DocsQueue _queue; - private IntQueue _posList; - - public UnionDocsAndPositionsEnum(IndexReader indexReader, Term[] terms) throws IOException { - List docsEnums = new LinkedList(); - final Bits delDocs = MultiFields.getDeletedDocs(indexReader); - for (int i = 0; i < terms.length; i++) { - final BytesRef text = new BytesRef(terms[i].text()); - DocsAndPositionsEnum postings = indexReader.termPositionsEnum(delDocs, - terms[i].field(), - text); - if (postings != null) { - docsEnums.add(postings); - } else { - if (MultiFields.getTermDocsEnum(indexReader, delDocs, terms[i].field(), text) != null) { - // term does exist, but has no positions - throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + terms[i].text() + ")"); - } - } - } - - _queue = new DocsQueue(docsEnums); - _posList = new IntQueue(); - } - - @Override - public final int nextDoc() throws IOException { - if (_queue.size() == 0) { - return NO_MORE_DOCS; - } - - // TODO: move this init into positions(): if the search - // doesn't need the positions for this doc then don't - // waste CPU merging them: - _posList.clear(); - _doc = _queue.top().docID(); - - // merge sort all positions together - DocsAndPositionsEnum postings; - do { - postings = _queue.top(); - - final int freq = postings.freq(); - for (int i = 0; i < freq; i++) { - _posList.add(postings.nextPosition()); - } - - if (postings.nextDoc() != NO_MORE_DOCS) { - _queue.updateTop(); - } else { - _queue.pop(); - } - } while (_queue.size() > 0 && _queue.top().docID() == _doc); - - _posList.sort(); - _freq = _posList.size(); - - return _doc; - } - - @Override - public int nextPosition() { - return _posList.next(); - } - - @Override - public BytesRef getPayload() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasPayload() { - throw new UnsupportedOperationException(); - } - - @Override - public final int advance(int target) throws IOException { - while (_queue.top() != null && target > _queue.top().docID()) { - DocsAndPositionsEnum postings = _queue.pop(); - if (postings.advance(target) != NO_MORE_DOCS) { - _queue.add(postings); - } - } - return nextDoc(); - } - - @Override - public final int freq() { - return _freq; - } - - @Override - public final int docID() { - return _doc; - } -} Index: lucene/src/java/org/apache/lucene/index/UnionDocsAndPositionsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/UnionDocsAndPositionsEnum.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/UnionDocsAndPositionsEnum.java (revision 0) @@ -0,0 +1,195 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Arrays; +import java.util.List; +import java.util.LinkedList; + +/** + * Takes the logical union of multiple DocsEnum iterators. + */ + +// TODO: if ever we allow subclassing of the *PhraseScorer +public class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { + + private static final class DocsQueue extends PriorityQueue { + DocsQueue(List docsEnums) throws IOException { + initialize(docsEnums.size()); + + Iterator i = docsEnums.iterator(); + while (i.hasNext()) { + DocsAndPositionsEnum postings = (DocsAndPositionsEnum) i.next(); + if (postings.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { + add(postings); + } + } + } + + public DocsEnum peek() { + return top(); + } + + @Override + public boolean lessThan(DocsAndPositionsEnum a, DocsAndPositionsEnum b) { + return a.docID() < b.docID(); + } + } + + private static final class IntQueue { + private int _arraySize = 16; + private int _index = 0; + private int _lastIndex = 0; + private int[] _array = new int[_arraySize]; + + void add(int i) { + if (_lastIndex == _arraySize) + growArray(); + + _array[_lastIndex++] = i; + } + + int next() { + return _array[_index++]; + } + + void sort() { + Arrays.sort(_array, _index, _lastIndex); + } + + void clear() { + _index = 0; + _lastIndex = 0; + } + + int size() { + return (_lastIndex - _index); + } + + private void growArray() { + int[] newArray = new int[_arraySize * 2]; + System.arraycopy(_array, 0, newArray, 0, _arraySize); + _array = newArray; + _arraySize *= 2; + } + } + + private int _doc; + private int _freq; + private DocsQueue _queue; + private IntQueue _posList; + + public UnionDocsAndPositionsEnum(IndexReader indexReader, Term[] terms) throws IOException { + List docsEnums = new LinkedList(); + final Bits delDocs = MultiFields.getDeletedDocs(indexReader); + for (int i = 0; i < terms.length; i++) { + final BytesRef text = new BytesRef(terms[i].text()); + DocsAndPositionsEnum postings = indexReader.termPositionsEnum(delDocs, + terms[i].field(), + text); + if (postings != null) { + docsEnums.add(postings); + } else { + if (MultiFields.getTermDocsEnum(indexReader, delDocs, terms[i].field(), text) != null) { + // term does exist, but has no positions + throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + terms[i].text() + ")"); + } + } + } + + _queue = new DocsQueue(docsEnums); + _posList = new IntQueue(); + } + + @Override + public final int nextDoc() throws IOException { + if (_queue.size() == 0) { + return NO_MORE_DOCS; + } + + // TODO: move this init into positions(): if the search + // doesn't need the positions for this doc then don't + // waste CPU merging them: + _posList.clear(); + _doc = _queue.top().docID(); + + // merge sort all positions together + DocsAndPositionsEnum postings; + do { + postings = _queue.top(); + + final int freq = postings.freq(); + for (int i = 0; i < freq; i++) { + _posList.add(postings.nextPosition()); + } + + if (postings.nextDoc() != NO_MORE_DOCS) { + _queue.updateTop(); + } else { + _queue.pop(); + } + } while (_queue.size() > 0 && _queue.top().docID() == _doc); + + _posList.sort(); + _freq = _posList.size(); + + return _doc; + } + + @Override + public int nextPosition() { + return _posList.next(); + } + + @Override + public BytesRef getPayload() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasPayload() { + throw new UnsupportedOperationException(); + } + + @Override + public final int advance(int target) throws IOException { + while (_queue.top() != null && target > _queue.top().docID()) { + DocsAndPositionsEnum postings = _queue.pop(); + if (postings.advance(target) != NO_MORE_DOCS) { + _queue.add(postings); + } + } + return nextDoc(); + } + + @Override + public final int freq() { + return _freq; + } + + @Override + public final int docID() { + return _doc; + } +} Property changes on: lucene/src/java/org/apache/lucene/index/UnionDocsAndPositionsEnum.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/MIGRATE.txt =================================================================== --- lucene/MIGRATE.txt (revision 960362) +++ lucene/MIGRATE.txt (working copy) @@ -140,7 +140,10 @@ ord) method. Note that these methods are optional; in particular the MultiFields TermsEnum does not implement them. + * The public MultipleTermPositions class, in oal.index, is now + UnionDocsAndPositionsEnum, cutover to the flex API. + How you obtain the enums has changed. The primary entry point is the Fields class. If you know your reader is a single segment reader, do this: