diff --git a/dev-tools/idea/lucene/highlighter/highlighter.iml b/dev-tools/idea/lucene/highlighter/highlighter.iml index 0a8e679..8b6d644 100644 --- a/dev-tools/idea/lucene/highlighter/highlighter.iml +++ b/dev-tools/idea/lucene/highlighter/highlighter.iml @@ -12,6 +12,7 @@ + diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java index c39856a..4daa678 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java @@ -7,7 +7,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -281,7 +281,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { // Make sure position is still incremented when // massive term is skipped: - DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, + DocsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another")); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java index 4c277de..6f03606 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java @@ -31,7 +31,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Terms; @@ -111,7 +111,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { TermsEnum termsEnum = vector.iterator(null); termsEnum.next(); assertEquals(2, termsEnum.totalTermFreq()); - DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null); + DocsEnum positions = termsEnum.docsAndPositions(null, null); assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(2, positions.freq()); positions.nextPosition(); diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java index d8c5a31..0396d60 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java @@ -17,17 +17,12 @@ package org.apache.lucene.codecs.lucene40; * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; - import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; @@ -42,6 +37,10 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.IOUtils; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; + /** * Concrete class that reads the 4.0 frq/prox * postings format. @@ -229,8 +228,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase { } @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, - DocsAndPositionsEnum reuse, int flags) + public DocsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, + DocsEnum reuse, int flags) throws IOException { boolean hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; @@ -562,7 +561,11 @@ public class Lucene40PostingsReader extends PostingsReaderBase { return NO_MORE_DOCS; } } - + + @Override + public int nextPosition() throws IOException { + return -1; + } } private final class LiveDocsSegmentDocsEnum extends SegmentDocsEnumBase { @@ -659,12 +662,17 @@ public class Lucene40PostingsReader extends PostingsReaderBase { return NO_MORE_DOCS; } + + @Override + public int nextPosition() throws IOException { + return -1; + } } // TODO specialize DocsAndPosEnum too // Decodes docs & positions. payloads nor offsets are present. - private final class SegmentDocsAndPositionsEnum extends DocsAndPositionsEnum { + private final class SegmentDocsAndPositionsEnum extends DocsEnum { final IndexInput startFreqIn; private final IndexInput freqIn; private final IndexInput proxIn; @@ -867,7 +875,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { } // Decodes docs & positions & (payloads and/or offsets) - private class SegmentFullPositionsEnum extends DocsAndPositionsEnum { + private class SegmentFullPositionsEnum extends DocsEnum { final IndexInput startFreqIn; private final IndexInput freqIn; private final IndexInput proxIn; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java index ec24b38..cad8025 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java @@ -17,18 +17,9 @@ package org.apache.lucene.codecs.lucene40; * limitations under the License. */ -import java.io.Closeable; -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.NoSuchElementException; - import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -46,6 +37,14 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.IOUtils; +import java.io.Closeable; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; + /** * Lucene 4.0 Term Vectors reader. *

@@ -491,7 +490,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos } @Override - public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags /* ignored */) throws IOException { + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { TVDocsEnum docsEnum; if (reuse != null && reuse instanceof TVDocsEnum) { docsEnum = (TVDocsEnum) reuse; @@ -503,7 +502,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { if (!storePositions && !storeOffsets) { return null; @@ -534,6 +533,11 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int docID() { return doc; } @@ -566,7 +570,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos } } - private static class TVDocsAndPositionsEnum extends DocsAndPositionsEnum { + private static class TVDocsAndPositionsEnum extends DocsEnum { private boolean didNext; private int doc = -1; private int nextPos; @@ -639,8 +643,10 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos @Override public int nextPosition() { - assert (positions != null && nextPos < positions.length) || - startOffsets != null && nextPos < startOffsets.length; + //assert (positions != null && nextPos < positions.length) || + // startOffsets != null && nextPos < startOffsets.length; + if (positions != null && nextPos >= positions.length) + return NO_MORE_POSITIONS; if (positions != null) { return positions[nextPos++]; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java index 788ab22..6d8ccf1 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java @@ -17,20 +17,11 @@ package org.apache.lucene.codecs.lucene42; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -65,6 +56,14 @@ import org.apache.lucene.util.packed.BlockPackedReader; import org.apache.lucene.util.packed.MonotonicBlockPackedReader; import org.apache.lucene.util.packed.PackedInts; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + /** * Reader for {@link Lucene42DocValuesFormat} */ @@ -652,7 +651,7 @@ class Lucene42DocValuesProducer extends DocValuesProducer { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java index 574f208..c9704ac 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java @@ -17,31 +17,11 @@ package org.apache.lucene.codecs.lucene45; * limitations under the License. */ -import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED; -import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED; -import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED; -import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.DELTA_COMPRESSED; -import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.GCD_COMPRESSED; -import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED; -import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES; -import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.TABLE_COMPRESSED; -import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat.VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED; - -import java.io.Closeable; // javadocs -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -63,12 +43,30 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LongValues; import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.Version; import org.apache.lucene.util.packed.BlockPackedReader; import org.apache.lucene.util.packed.MonotonicBlockPackedReader; import org.apache.lucene.util.packed.PackedInts; +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED; +import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED; +import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED; +import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.DELTA_COMPRESSED; +import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.GCD_COMPRESSED; +import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED; +import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES; +import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.TABLE_COMPRESSED; +import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat.VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED; + /** reader for {@link Lucene45DocValuesFormat} */ class Lucene45DocValuesProducer extends DocValuesProducer implements Closeable { private final Map numerics; @@ -916,7 +914,7 @@ class Lucene45DocValuesProducer extends DocValuesProducer implements Closeable { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } }; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java index 86d01b9..bd08dd2 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java @@ -17,31 +17,11 @@ package org.apache.lucene.codecs.lucene49; * limitations under the License. */ -import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED; -import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.BINARY_PREFIX_COMPRESSED; -import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED; -import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.DELTA_COMPRESSED; -import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.GCD_COMPRESSED; -import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.MONOTONIC_COMPRESSED; -import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.SORTED_SINGLE_VALUED; -import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.SORTED_WITH_ADDRESSES; -import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.TABLE_COMPRESSED; - -import java.io.Closeable; // javadocs -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -67,6 +47,25 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.DirectReader; import org.apache.lucene.util.packed.MonotonicBlockPackedReader; +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED; +import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.BINARY_PREFIX_COMPRESSED; +import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED; +import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.DELTA_COMPRESSED; +import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.GCD_COMPRESSED; +import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.MONOTONIC_COMPRESSED; +import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.SORTED_SINGLE_VALUED; +import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.SORTED_WITH_ADDRESSES; +import static org.apache.lucene.codecs.lucene49.Lucene49DocValuesConsumer.TABLE_COMPRESSED; + /** reader for {@link Lucene49DocValuesFormat} */ class Lucene49DocValuesProducer extends DocValuesProducer implements Closeable { private final Map numerics; @@ -953,9 +952,10 @@ class Lucene49DocValuesProducer extends DocValuesProducer implements Closeable { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } + }; } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java index 3d267c0..f51fd94 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java @@ -29,7 +29,7 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; @@ -692,7 +692,7 @@ public class BlockTermsReader extends FieldsProducer { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed: return null; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java index 24d01ec..0acf1ef 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java @@ -17,10 +17,7 @@ package org.apache.lucene.codecs.blocktreeords; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.TermState; @@ -35,6 +32,8 @@ import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.RunAutomaton; import org.apache.lucene.util.fst.FST; +import java.io.IOException; + // NOTE: cannot seek! final class OrdsIntersectTermsEnum extends TermsEnum { final IndexInput in; @@ -209,7 +208,7 @@ final class OrdsIntersectTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits skipDocs, DocsEnum reuse, int flags) throws IOException { if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed: return null; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java index 9d43ca3..5cc869e 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java @@ -20,12 +20,8 @@ package org.apache.lucene.codecs.blocktreeords; //import java.io.*; //import java.nio.charset.StandardCharsets; -import java.io.IOException; -import java.io.PrintStream; - import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.TermState; @@ -42,6 +38,9 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.Util; +import java.io.IOException; +import java.io.PrintStream; + /** Iterates through terms in this field. */ public final class OrdsSegmentTermsEnum extends TermsEnum { @@ -937,7 +936,7 @@ public final class OrdsSegmentTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits skipDocs, DocsEnum reuse, int flags) throws IOException { if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed: return null; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java index 4760fd5..012a2e0 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java @@ -17,21 +17,11 @@ package org.apache.lucene.codecs.bloom; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map.Entry; -import java.util.Map; - import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; @@ -51,6 +41,15 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.automaton.CompiledAutomaton; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + /** *

* A {@link PostingsFormat} useful for low doc-frequency fields such as primary @@ -386,11 +385,10 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { public long totalTermFreq() throws IOException { return delegate().totalTermFreq(); } - @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, - DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, + DocsEnum reuse, int flags) throws IOException { return delegate().docsAndPositions(liveDocs, reuse, flags); } @@ -399,6 +397,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { throws IOException { return delegate().docs(liveDocs, reuse, flags); } + } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java index 418ae8c..22b9e05 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java @@ -17,20 +17,13 @@ package org.apache.lucene.codecs.memory; * limitations under the License. */ -import java.io.IOException; -import java.util.Collections; -import java.util.Iterator; -import java.util.Map; -import java.util.TreeMap; - import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Fields; import org.apache.lucene.index.OrdTermState; import org.apache.lucene.index.SegmentReadState; @@ -50,6 +43,12 @@ import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.RunAutomaton; import org.apache.lucene.util.automaton.Transition; +import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; + // TODO: // - build depth-N prefix hash? // - or: longer dense skip lists than just next byte? @@ -334,7 +333,7 @@ public final class DirectPostingsFormat extends PostingsFormat { BytesRef term; DocsEnum docsEnum = null; - DocsAndPositionsEnum docsAndPositionsEnum = null; + DocsEnum docsAndPositionsEnum = null; final TermsEnum termsEnum = termsIn.iterator(null); int termOffset = 0; @@ -875,6 +874,7 @@ public final class DirectPostingsFormat extends PostingsFormat { if (terms[termOrd] instanceof LowFreqTerm) { final int[] postings = ((LowFreqTerm) terms[termOrd]).postings; + final byte[] payloads = ((LowFreqTerm) terms[termOrd]).payloads; if (hasFreq) { if (hasPos) { int posLen; @@ -890,13 +890,13 @@ public final class DirectPostingsFormat extends PostingsFormat { if (reuse instanceof LowFreqDocsEnum) { docsEnum = (LowFreqDocsEnum) reuse; if (!docsEnum.canReuse(liveDocs, posLen)) { - docsEnum = new LowFreqDocsEnum(liveDocs, posLen); + docsEnum = new LowFreqDocsEnum(liveDocs, hasOffsets, hasPayloads); } } else { - docsEnum = new LowFreqDocsEnum(liveDocs, posLen); + docsEnum = new LowFreqDocsEnum(liveDocs, hasOffsets, hasPayloads); } - return docsEnum.reset(postings); + return docsEnum.reset(postings, payloads); } else { LowFreqDocsEnumNoPos docsEnum; if (reuse instanceof LowFreqDocsEnumNoPos) { @@ -942,7 +942,7 @@ public final class DirectPostingsFormat extends PostingsFormat { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) { if (!hasPos) { return null; } @@ -954,7 +954,7 @@ public final class DirectPostingsFormat extends PostingsFormat { final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]); final int[] postings = term.postings; final byte[] payloads = term.payloads; - return new LowFreqDocsAndPositionsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads); + return new LowFreqDocsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads); } else { final HighFreqTerm term = (HighFreqTerm) terms[termOrd]; return new HighFreqDocsAndPositionsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads); @@ -1473,6 +1473,7 @@ public final class DirectPostingsFormat extends PostingsFormat { if (terms[termOrd] instanceof LowFreqTerm) { final int[] postings = ((LowFreqTerm) terms[termOrd]).postings; + final byte[] payloads = ((LowFreqTerm) terms[termOrd]).payloads; if (hasFreq) { if (hasPos) { int posLen; @@ -1484,7 +1485,7 @@ public final class DirectPostingsFormat extends PostingsFormat { if (hasPayloads) { posLen++; } - return new LowFreqDocsEnum(liveDocs, posLen).reset(postings); + return new LowFreqDocsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads); } else { return new LowFreqDocsEnumNoPos(liveDocs).reset(postings); } @@ -1499,7 +1500,7 @@ public final class DirectPostingsFormat extends PostingsFormat { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) { if (!hasPos) { return null; } @@ -1511,7 +1512,7 @@ public final class DirectPostingsFormat extends PostingsFormat { final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]); final int[] postings = term.postings; final byte[] payloads = term.payloads; - return new LowFreqDocsAndPositionsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads); + return new LowFreqDocsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads); } else { final HighFreqTerm term = (HighFreqTerm) terms[termOrd]; return new HighFreqDocsAndPositionsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads); @@ -1587,6 +1588,11 @@ public final class DirectPostingsFormat extends PostingsFormat { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int advance(int target) throws IOException { // Linear scan, but this is low-freq term so it won't // be costly: @@ -1655,87 +1661,8 @@ public final class DirectPostingsFormat extends PostingsFormat { } @Override - public int advance(int target) throws IOException { - // Linear scan, but this is low-freq term so it won't - // be costly: - return slowAdvance(target); - } - - @Override - public long cost() { - return postings.length / 2; - } - } - - // Docs + freqs + positions/offets: - private final static class LowFreqDocsEnum extends DocsEnum { - private int[] postings; - private final Bits liveDocs; - private final int posMult; - private int upto; - private int freq; - - public LowFreqDocsEnum(Bits liveDocs, int posMult) { - this.liveDocs = liveDocs; - this.posMult = posMult; - // if (DEBUG) { - // System.out.println("LowFreqDE: posMult=" + posMult); - // } - } - - public boolean canReuse(Bits liveDocs, int posMult) { - return liveDocs == this.liveDocs && posMult == this.posMult; - } - - public DocsEnum reset(int[] postings) { - this.postings = postings; - upto = -2; - freq = 0; - return this; - } - - // TODO: can do this w/o setting members? - @Override - public int nextDoc() { - upto += 2 + freq*posMult; - // if (DEBUG) { - // System.out.println(" nextDoc freq=" + freq + " upto=" + upto + " vs " + postings.length); - // } - if (liveDocs == null) { - if (upto < postings.length) { - freq = postings[upto+1]; - assert freq > 0; - return postings[upto]; - } - } else { - while (upto < postings.length) { - freq = postings[upto+1]; - assert freq > 0; - if (liveDocs.get(postings[upto])) { - return postings[upto]; - } - upto += 2 + freq*posMult; - } - } - return NO_MORE_DOCS; - } - - @Override - public int docID() { - // TODO: store docID member? - if (upto < 0) { - return -1; - } else if (upto < postings.length) { - return postings[upto]; - } else { - return NO_MORE_DOCS; - } - } - - @Override - public int freq() { - // TODO: can I do postings[upto+1]? - return freq; + public int nextPosition() throws IOException { + return -1; } @Override @@ -1747,12 +1674,11 @@ public final class DirectPostingsFormat extends PostingsFormat { @Override public long cost() { - // TODO: could do a better estimate return postings.length / 2; } } - private final static class LowFreqDocsAndPositionsEnum extends DocsAndPositionsEnum { + private final static class LowFreqDocsEnum extends DocsEnum { private int[] postings; private final Bits liveDocs; private final int posMult; @@ -1763,6 +1689,7 @@ public final class DirectPostingsFormat extends PostingsFormat { private int docID; private int freq; private int skipPositions; + private int pos; private int startOffset; private int endOffset; private int lastPayloadOffset; @@ -1770,7 +1697,7 @@ public final class DirectPostingsFormat extends PostingsFormat { private int payloadLength; private byte[] payloadBytes; - public LowFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets, boolean hasPayloads) { + public LowFreqDocsEnum(Bits liveDocs, boolean hasOffsets, boolean hasPayloads) { this.liveDocs = liveDocs; this.hasOffsets = hasOffsets; this.hasPayloads = hasPayloads; @@ -1787,7 +1714,11 @@ public final class DirectPostingsFormat extends PostingsFormat { } } - public DocsAndPositionsEnum reset(int[] postings, byte[] payloadBytes) { + public boolean canReuse(Bits liveDocs, int posMult) { + return liveDocs == this.liveDocs && posMult == this.posMult; + } + + public DocsEnum reset(int[] postings, byte[] payloadBytes) { this.postings = postings; upto = 0; skipPositions = 0; @@ -1841,7 +1772,7 @@ public final class DirectPostingsFormat extends PostingsFormat { } } } - + pos = -1; return docID = NO_MORE_DOCS; } @@ -1857,9 +1788,11 @@ public final class DirectPostingsFormat extends PostingsFormat { @Override public int nextPosition() { - assert skipPositions > 0; + //assert skipPositions > 0; + if (skipPositions == 0) + return NO_MORE_POSITIONS; skipPositions--; - final int pos = postings[upto++]; + pos = postings[upto++]; if (hasOffsets) { startOffset = postings[upto++]; endOffset = postings[upto++]; @@ -1873,6 +1806,16 @@ public final class DirectPostingsFormat extends PostingsFormat { } @Override + public int startPosition() throws IOException { + return pos; + } + + @Override + public int endPosition() throws IOException { + return pos; + } + + @Override public int startOffset() { return startOffset; } @@ -1971,6 +1914,11 @@ public final class DirectPostingsFormat extends PostingsFormat { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int advance(int target) { /* upto++; @@ -2085,7 +2033,7 @@ public final class DirectPostingsFormat extends PostingsFormat { } // TODO: specialize offsets and not - private final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum { + private final static class HighFreqDocsAndPositionsEnum extends DocsEnum { private int[] docIDs; private int[] freqs; private int[][] positions; @@ -2120,7 +2068,7 @@ public final class DirectPostingsFormat extends PostingsFormat { return liveDocs; } - public DocsAndPositionsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) { + public DocsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) { this.docIDs = docIDs; this.freqs = freqs; this.positions = positions; @@ -2164,6 +2112,8 @@ public final class DirectPostingsFormat extends PostingsFormat { @Override public int nextPosition() { + if (posUpto >= curPositions.length) + return NO_MORE_POSITIONS; posUpto += posJump; return curPositions[posUpto]; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java index 1e4e9cc..c84de8c 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java @@ -17,20 +17,14 @@ package org.apache.lucene.codecs.memory; * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.TreeMap; - +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; @@ -41,26 +35,31 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.automaton.ByteRunAutomaton; -import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountables; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.automaton.ByteRunAutomaton; +import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.Util; -import org.apache.lucene.codecs.BlockTermState; -import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.codecs.CodecUtil; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.TreeMap; /** * FST-based terms dictionary reader. @@ -438,12 +437,8 @@ public class FSTOrdTermsReader extends FieldsProducer { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { - if (!hasPositions()) { - return null; - } - decodeMetaData(); - return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags); + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { + return docs(liveDocs, reuse, flags); } // TODO: this can be achieved by making use of Util.getByOutput() diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java index 97b96c7..55fc374 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java @@ -17,19 +17,14 @@ package org.apache.lucene.codecs.memory; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.TreeMap; - +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; @@ -39,25 +34,29 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.automaton.ByteRunAutomaton; -import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountables; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.automaton.ByteRunAutomaton; +import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.util.fst.Util; -import org.apache.lucene.codecs.BlockTermState; -import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.codecs.CodecUtil; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.TreeMap; /** * FST-based terms dictionary reader. @@ -305,12 +304,8 @@ public class FSTTermsReader extends FieldsProducer { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { - if (!hasPositions()) { - return null; - } - decodeMetaData(); - return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags); + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { + return docs(liveDocs, reuse, flags); } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java index f5ba225..6a78b89 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java @@ -17,20 +17,11 @@ package org.apache.lucene.codecs.memory; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -67,6 +58,14 @@ import org.apache.lucene.util.packed.BlockPackedReader; import org.apache.lucene.util.packed.MonotonicBlockPackedReader; import org.apache.lucene.util.packed.PackedInts; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + /** * Reader for {@link MemoryDocValuesFormat} */ @@ -840,7 +839,7 @@ class MemoryDocValuesProducer extends DocValuesProducer { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java index 8e58251..b78ca30 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java @@ -17,19 +17,11 @@ package org.apache.lucene.codecs.memory; * limitations under the License. */ -import java.io.IOException; -import java.util.Collections; -import java.util.Iterator; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; - import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.TermStats; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; @@ -53,7 +45,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.fst.Builder; @@ -63,6 +54,13 @@ import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.packed.PackedInts; +import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + // TODO: would be nice to somehow allow this to act like // InstantiatedIndex, by never writing to disk; ie you write // to this Codec in RAM only and then when you open a reader @@ -317,7 +315,7 @@ public final class MemoryPostingsFormat extends PostingsFormat { long sumTotalTermFreq = 0; long sumDocFreq = 0; DocsEnum docsEnum = null; - DocsAndPositionsEnum posEnum = null; + DocsEnum posEnum = null; int enumFlags; IndexOptions indexOptions = fieldInfo.getIndexOptions(); @@ -332,15 +330,15 @@ public final class MemoryPostingsFormat extends PostingsFormat { enumFlags = DocsEnum.FLAG_FREQS; } else if (writeOffsets == false) { if (writePayloads) { - enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS; + enumFlags = DocsEnum.FLAG_PAYLOADS; } else { enumFlags = 0; } } else { if (writePayloads) { - enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS | DocsAndPositionsEnum.FLAG_OFFSETS; + enumFlags = DocsEnum.FLAG_PAYLOADS | DocsEnum.FLAG_OFFSETS; } else { - enumFlags = DocsAndPositionsEnum.FLAG_OFFSETS; + enumFlags = DocsEnum.FLAG_OFFSETS; } } @@ -539,14 +537,19 @@ public final class MemoryPostingsFormat extends PostingsFormat { public int freq() { return freq; } - + + @Override + public int nextPosition() throws IOException { + return -1; + } + @Override public long cost() { return numDocs; } } - private final static class FSTDocsAndPositionsEnum extends DocsAndPositionsEnum { + private final static class FSTDocsAndPositionsEnum extends DocsEnum { private final boolean storePayloads; private byte[] buffer = new byte[16]; private final ByteArrayDataInput in = new ByteArrayDataInput(buffer); @@ -817,7 +820,7 @@ public final class MemoryPostingsFormat extends PostingsFormat { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) { boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index 780c821..6060846 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -17,26 +17,7 @@ package org.apache.lucene.codecs.simpletext; * limitations under the License. */ -import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC; -import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END; -import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET; -import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD; -import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ; -import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD; -import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS; -import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET; -import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.TreeMap; - import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; @@ -57,11 +38,9 @@ import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.StringHelper; -import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.FST; @@ -69,6 +48,24 @@ import org.apache.lucene.util.fst.PairOutputs; import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.Util; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; + +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM; + class SimpleTextFieldsReader extends FieldsProducer { private static final long BASE_RAM_BYTES_USED = @@ -216,22 +213,22 @@ class SimpleTextFieldsReader extends FieldsProducer { } else { docsEnum = new SimpleTextDocsEnum(); } - return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY, docFreq); + return docsEnum.reset(docsStart, liveDocs, indexOptions, docFreq); } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed return null; } - SimpleTextDocsAndPositionsEnum docsAndPositionsEnum; - if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) { - docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse; + SimpleTextDocsEnum docsAndPositionsEnum; + if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) { + docsAndPositionsEnum = (SimpleTextDocsEnum) reuse; } else { - docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum(); + docsAndPositionsEnum = new SimpleTextDocsEnum(); } return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions, docFreq); } @@ -240,105 +237,6 @@ class SimpleTextFieldsReader extends FieldsProducer { private class SimpleTextDocsEnum extends DocsEnum { private final IndexInput inStart; private final IndexInput in; - private boolean omitTF; - private int docID = -1; - private int tf; - private Bits liveDocs; - private final BytesRefBuilder scratch = new BytesRefBuilder(); - private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder(); - private int cost; - - public SimpleTextDocsEnum() { - this.inStart = SimpleTextFieldsReader.this.in; - this.in = this.inStart.clone(); - } - - public boolean canReuse(IndexInput in) { - return in == inStart; - } - - public SimpleTextDocsEnum reset(long fp, Bits liveDocs, boolean omitTF, int docFreq) throws IOException { - this.liveDocs = liveDocs; - in.seek(fp); - this.omitTF = omitTF; - docID = -1; - tf = 1; - cost = docFreq; - return this; - } - - @Override - public int docID() { - return docID; - } - - @Override - public int freq() throws IOException { - return tf; - } - - @Override - public int nextDoc() throws IOException { - if (docID == NO_MORE_DOCS) { - return docID; - } - boolean first = true; - int termFreq = 0; - while(true) { - final long lineStart = in.getFilePointer(); - SimpleTextUtil.readLine(in, scratch); - if (StringHelper.startsWith(scratch.get(), DOC)) { - if (!first && (liveDocs == null || liveDocs.get(docID))) { - in.seek(lineStart); - if (!omitTF) { - tf = termFreq; - } - return docID; - } - scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length); - docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); - termFreq = 0; - first = false; - } else if (StringHelper.startsWith(scratch.get(), FREQ)) { - scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length); - termFreq = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); - } else if (StringHelper.startsWith(scratch.get(), POS)) { - // skip termFreq++; - } else if (StringHelper.startsWith(scratch.get(), START_OFFSET)) { - // skip - } else if (StringHelper.startsWith(scratch.get(), END_OFFSET)) { - // skip - } else if (StringHelper.startsWith(scratch.get(), PAYLOAD)) { - // skip - } else { - assert StringHelper.startsWith(scratch.get(), TERM) || StringHelper.startsWith(scratch.get(), FIELD) || StringHelper.startsWith(scratch.get(), END): "scratch=" + scratch.get().utf8ToString(); - if (!first && (liveDocs == null || liveDocs.get(docID))) { - in.seek(lineStart); - if (!omitTF) { - tf = termFreq; - } - return docID; - } - return docID = NO_MORE_DOCS; - } - } - } - - @Override - public int advance(int target) throws IOException { - // Naive -- better to index skip data - return slowAdvance(target); - } - - @Override - public long cost() { - return cost; - } - } - - private class SimpleTextDocsAndPositionsEnum extends DocsAndPositionsEnum { - private final IndexInput inStart; - private final IndexInput in; private int docID = -1; private int tf; private Bits liveDocs; @@ -352,9 +250,10 @@ class SimpleTextFieldsReader extends FieldsProducer { private boolean readPositions; private int startOffset; private int endOffset; + private int posPending; private int cost; - public SimpleTextDocsAndPositionsEnum() { + public SimpleTextDocsEnum() { this.inStart = SimpleTextFieldsReader.this.in; this.in = inStart.clone(); } @@ -363,7 +262,7 @@ class SimpleTextFieldsReader extends FieldsProducer { return in == inStart; } - public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions, int docFreq) { + public SimpleTextDocsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions, int docFreq) { this.liveDocs = liveDocs; nextDocStart = fp; docID = -1; @@ -405,6 +304,7 @@ class SimpleTextFieldsReader extends FieldsProducer { scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length); docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); tf = 0; + posPending = 0; first = false; } else if (StringHelper.startsWith(scratch.get(), FREQ)) { scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length); @@ -439,6 +339,9 @@ class SimpleTextFieldsReader extends FieldsProducer { @Override public int nextPosition() throws IOException { final int pos; + if (posPending == 0) + return NO_MORE_POSITIONS; + if (readPositions) { SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), POS): "got line=" + scratch.get().utf8ToString(); @@ -472,6 +375,7 @@ class SimpleTextFieldsReader extends FieldsProducer { payload = null; in.seek(fp); } + posPending--; return pos; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java index 656713d..26a012c 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java @@ -17,10 +17,7 @@ package org.apache.lucene.codecs.simpletext; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.codecs.FieldsConsumer; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -32,8 +29,10 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import java.io.IOException; + class SimpleTextFieldsWriter extends FieldsConsumer { - + private IndexOutput out; private final BytesRefBuilder scratch = new BytesRefBuilder(); private final SegmentWriteState writeState; @@ -81,10 +80,10 @@ class SimpleTextFieldsWriter extends FieldsConsumer { if (hasPositions) { if (hasPayloads) { - flags = flags | DocsAndPositionsEnum.FLAG_PAYLOADS; + flags = flags | DocsEnum.FLAG_PAYLOADS; } if (hasOffsets) { - flags = flags | DocsAndPositionsEnum.FLAG_OFFSETS; + flags = flags | DocsEnum.FLAG_OFFSETS; } } else { if (hasFreqs) { @@ -93,7 +92,6 @@ class SimpleTextFieldsWriter extends FieldsConsumer { } TermsEnum termsEnum = terms.iterator(null); - DocsAndPositionsEnum posEnum = null; DocsEnum docsEnum = null; // for each term in field @@ -104,8 +102,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer { } if (hasPositions) { - posEnum = termsEnum.docsAndPositions(null, posEnum, flags); - docsEnum = posEnum; + docsEnum = termsEnum.docsAndPositions(null, docsEnum, flags); } else { docsEnum = termsEnum.docs(null, docsEnum, flags); } @@ -154,15 +151,15 @@ class SimpleTextFieldsWriter extends FieldsConsumer { // for each pos in field+term+doc for(int i=0;i= startOffset; assert startOffset >= lastStartOffset: "startOffset=" + startOffset + " lastStartOffset=" + lastStartOffset; lastStartOffset = startOffset; @@ -174,7 +171,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer { newline(); } - BytesRef payload = posEnum.getPayload(); + BytesRef payload = docsEnum.getPayload(); if (payload != null && payload.length > 0) { assert payload.length != 0; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index d085bba..49a87db 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -17,15 +17,7 @@ package org.apache.lucene.codecs.simpletext; * limitations under the License. */ -import java.io.IOException; -import java.util.Collections; -import java.util.Iterator; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; - import org.apache.lucene.codecs.TermVectorsReader; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexFileNames; @@ -49,7 +41,29 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.StringHelper; -import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*; +import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.DOC; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.END; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.ENDOFFSET; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELD; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELDNAME; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELDOFFSETS; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELDPAYLOADS; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELDPOSITIONS; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELDTERMCOUNT; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.NUMFIELDS; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.PAYLOAD; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.POSITION; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.STARTOFFSET; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.TERMFREQ; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.TERMTEXT; +import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.VECTORS_EXTENSION; /** * Reads plain-text term vectors. @@ -391,73 +405,28 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { @Override public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { // TODO: reuse + SimpleTVPostings postings = current.getValue(); SimpleTVDocsEnum e = new SimpleTVDocsEnum(); - e.reset(liveDocs, (flags & DocsEnum.FLAG_FREQS) == 0 ? 1 : current.getValue().freq); + e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads); return e; } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { SimpleTVPostings postings = current.getValue(); if (postings.positions == null && postings.startOffsets == null) { return null; } // TODO: reuse - SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum(); + SimpleTVDocsEnum e = new SimpleTVDocsEnum(); e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads); return e; } } - // note: these two enum classes are exactly like the Default impl... private static class SimpleTVDocsEnum extends DocsEnum { private boolean didNext; private int doc = -1; - private int freq; - private Bits liveDocs; - - @Override - public int freq() throws IOException { - assert freq != -1; - return freq; - } - - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() { - if (!didNext && (liveDocs == null || liveDocs.get(0))) { - didNext = true; - return (doc = 0); - } else { - return (doc = NO_MORE_DOCS); - } - } - - @Override - public int advance(int target) throws IOException { - return slowAdvance(target); - } - - public void reset(Bits liveDocs, int freq) { - this.liveDocs = liveDocs; - this.freq = freq; - this.doc = -1; - didNext = false; - } - - @Override - public long cost() { - return 1; - } - } - - private static class SimpleTVDocsAndPositionsEnum extends DocsAndPositionsEnum { - private boolean didNext; - private int doc = -1; private int nextPos; private Bits liveDocs; private int[] positions; @@ -513,9 +482,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { @Override public int nextPosition() { - assert (positions != null && nextPos < positions.length) || - startOffsets != null && nextPos < startOffsets.length; + //assert (positions != null && nextPos < positions.length) || + // startOffsets != null && nextPos < startOffsets.length; if (positions != null) { + if (nextPos >= positions.length) + return NO_MORE_POSITIONS; return positions[nextPos++]; } else { nextPos++; diff --git a/lucene/core/src/java/org/apache/lucene/analysis/Token.java b/lucene/core/src/java/org/apache/lucene/analysis/Token.java index c3bfecb..230a2cd 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/Token.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/Token.java @@ -20,7 +20,7 @@ package org.apache.lucene.analysis; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.AttributeImpl; @@ -43,7 +43,7 @@ import org.apache.lucene.util.BytesRef; with type "eos". The default token type is "word".

A Token can optionally have metadata (a.k.a. payload) in the form of a variable - length byte array. Use {@link DocsAndPositionsEnum#getPayload()} to retrieve the + length byte array. Use {@link DocsEnum#getPayload()} to retrieve the payloads from the index.

diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java index 8793c94..0029ccf 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java @@ -17,7 +17,7 @@ package org.apache.lucene.analysis.tokenattributes; * limitations under the License. */ -import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs +import org.apache.lucene.index.DocsEnum; // javadocs import org.apache.lucene.util.Attribute; import org.apache.lucene.util.BytesRef; @@ -33,7 +33,7 @@ import org.apache.lucene.util.BytesRef; * best to use the minimum number of bytes necessary. Some codec implementations * may optimize payload storage when all payloads have the same length. * - * @see DocsAndPositionsEnum + * @see DocsEnum */ public interface PayloadAttribute extends Attribute { /** diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java index 9afd2f9..a7a7cd9 100644 --- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java +++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java @@ -43,7 +43,7 @@ import org.apache.lucene.util.Attribute; * * * - * @see org.apache.lucene.index.DocsAndPositionsEnum + * @see org.apache.lucene.index.DocsEnum */ public interface PositionIncrementAttribute extends Attribute { /** Set the position increment. The default value is one. diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java index 7145fe9..2aa6add 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java @@ -17,10 +17,6 @@ package org.apache.lucene.codecs; * limitations under the License. */ -import java.io.Closeable; -import java.io.IOException; - -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.store.DataInput; @@ -28,10 +24,13 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Bits; +import java.io.Closeable; +import java.io.IOException; + /** The core terms dictionaries (BlockTermsReader, * BlockTreeTermsReader) interact with a single instance * of this class to manage creation of {@link DocsEnum} and - * {@link DocsAndPositionsEnum} instances. It provides an + * {@link DocsEnum} instances. It provides an * IndexInput (termsIn) where this class may read any * previously stored data that it had written in its * corresponding {@link PostingsWriterBase} at indexing @@ -69,9 +68,10 @@ public abstract class PostingsReaderBase implements Closeable, Accountable { /** Must fully consume state, since after this call that * TermState may be reused. */ - public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse, + public abstract DocsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse, int flags) throws IOException; - + // nocommit this still has the distinction - no need to remove this as long as we get the interface straight? + /** * Checks consistency of this reader. *

@@ -80,7 +80,7 @@ public abstract class PostingsReaderBase implements Closeable, Accountable { * @lucene.internal */ public abstract void checkIntegrity() throws IOException; - + @Override public abstract void close() throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java index 6e08316..fe7cd0a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java @@ -17,12 +17,8 @@ package org.apache.lucene.codecs; * limitations under the License. */ -import java.io.Closeable; -import java.io.IOException; - import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; -import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs -import org.apache.lucene.index.DocsEnum; // javadocs +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.DataOutput; @@ -30,6 +26,9 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; +import java.io.Closeable; +import java.io.IOException; + /** * Class that plugs into term dictionaries, such as {@link * BlockTreeTermsWriter}, and handles writing postings. @@ -53,8 +52,8 @@ public abstract class PostingsWriterBase implements Closeable { public abstract void init(IndexOutput termsOut) throws IOException; /** Write all postings for one term; use the provided - * {@link TermsEnum} to pull a {@link DocsEnum} or {@link - * DocsAndPositionsEnum}. This method should not + * {@link TermsEnum} to pull a {@link DocsEnum}. + * This method should not * re-position the {@code TermsEnum}! It is already * positioned on the term that should be written. This * method must set the bit in the provided {@link diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java index a310cf6..bd25e0c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java @@ -17,18 +17,17 @@ package org.apache.lucene.codecs; * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; +import java.io.IOException; + /** * Extension of {@link PostingsWriterBase}, adding a push * API for writing each element of the postings. This API @@ -45,7 +44,6 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase { // Reused in writeTerm private DocsEnum docsEnum; - private DocsAndPositionsEnum posEnum; private int enumFlags; /** {@link FieldInfo} of current field being written. */ @@ -125,15 +123,15 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase { enumFlags = DocsEnum.FLAG_FREQS; } else if (writeOffsets == false) { if (writePayloads) { - enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS; + enumFlags = DocsEnum.FLAG_PAYLOADS; } else { enumFlags = 0; } } else { if (writePayloads) { - enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS | DocsAndPositionsEnum.FLAG_OFFSETS; + enumFlags = DocsEnum.FLAG_PAYLOADS | DocsEnum.FLAG_OFFSETS; } else { - enumFlags = DocsAndPositionsEnum.FLAG_OFFSETS; + enumFlags = DocsEnum.FLAG_OFFSETS; } } @@ -146,8 +144,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase { if (writePositions == false) { docsEnum = termsEnum.docs(null, docsEnum, enumFlags); } else { - posEnum = termsEnum.docsAndPositions(null, posEnum, enumFlags); - docsEnum = posEnum; + docsEnum = termsEnum.docsAndPositions(null, docsEnum, enumFlags); } assert docsEnum != null; @@ -171,13 +168,13 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase { if (writePositions) { for(int i=0;i= 0; + assert !hasPositions || pos >= 0 ; addPosition(pos, startOffset, endOffset, payload); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java index a7a569b..7278a7b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java @@ -17,9 +17,6 @@ package org.apache.lucene.codecs.blocktree; * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.TermState; @@ -36,6 +33,8 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.Outputs; +import java.io.IOException; + // NOTE: cannot seek! final class IntersectTermsEnum extends TermsEnum { final IndexInput in; @@ -209,7 +208,7 @@ final class IntersectTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits skipDocs, DocsEnum reuse, int flags) throws IOException { if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed: return null; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java index 32566de..fa8cd2d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java @@ -17,11 +17,7 @@ package org.apache.lucene.codecs.blocktree; * limitations under the License. */ -import java.io.IOException; -import java.io.PrintStream; - import org.apache.lucene.codecs.BlockTermState; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.TermState; @@ -36,6 +32,9 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.Util; +import java.io.IOException; +import java.io.PrintStream; + /** Iterates through terms in this field */ final class SegmentTermsEnum extends TermsEnum { @@ -994,7 +993,7 @@ final class SegmentTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits skipDocs, DocsEnum reuse, int flags) throws IOException { if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed: return null; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java index 9cac92b..059ca65 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java @@ -39,7 +39,6 @@ import java.util.NoSuchElementException; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -64,6 +63,24 @@ import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.packed.BlockPackedReaderIterator; import org.apache.lucene.util.packed.PackedInts; +import java.io.Closeable; +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.BLOCK_SIZE; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_DAT; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_IDX; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.FLAGS_BITS; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.OFFSETS; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.PAYLOADS; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.POSITIONS; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_EXTENSION; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CHECKSUM; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START; + /** * {@link TermVectorsReader} for {@link CompressingTermVectorsFormat}. @@ -913,17 +930,17 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { if (positions == null && startOffsets == null) { return null; } // TODO: slightly sheisty - return (DocsAndPositionsEnum) docs(liveDocs, reuse, flags); + return docs(liveDocs, reuse, flags); } } - private static class TVDocsEnum extends DocsAndPositionsEnum { + private static class TVDocsEnum extends DocsEnum { private Bits liveDocs; private int doc = -1; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java index 26cb34b..9eefdfe 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java @@ -17,19 +17,9 @@ package org.apache.lucene.codecs.lucene41; * limitations under the License. */ -import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE; -import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE; -import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE; -import static org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter.IntBlockTermState; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; - import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; @@ -47,6 +37,15 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; + +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE; +import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter.IntBlockTermState; + /** * Concrete class that reads docId(maybe frq,pos,offset,payloads) list * with postings format. @@ -246,30 +245,38 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { @Override public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException { - BlockDocsEnum docsEnum; - if (reuse instanceof BlockDocsEnum) { - docsEnum = (BlockDocsEnum) reuse; - if (!docsEnum.canReuse(docIn, fieldInfo)) { + if ((flags & DocsEnum.FLAG_POSITIONS) != DocsEnum.FLAG_POSITIONS) { + BlockDocsEnum docsEnum; + if (reuse instanceof BlockDocsEnum) { + docsEnum = (BlockDocsEnum) reuse; + if (!docsEnum.canReuse(docIn, fieldInfo)) { + docsEnum = new BlockDocsEnum(fieldInfo); + } + } else { docsEnum = new BlockDocsEnum(fieldInfo); } - } else { - docsEnum = new BlockDocsEnum(fieldInfo); + return docsEnum.reset(liveDocs, (IntBlockTermState) termState, flags); } - return docsEnum.reset(liveDocs, (IntBlockTermState) termState, flags); + + return docsAndPositions(fieldInfo, termState, liveDocs, reuse, flags); } // TODO: specialize to liveDocs vs not @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, - DocsAndPositionsEnum reuse, int flags) + public DocsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, + DocsEnum reuse, int flags) throws IOException { + boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; boolean indexHasPayloads = fieldInfo.hasPayloads(); - if ((!indexHasOffsets || (flags & DocsAndPositionsEnum.FLAG_OFFSETS) == 0) && - (!indexHasPayloads || (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) == 0)) { + if (!indexHasPositions) + return null; + + if ((!indexHasOffsets || (flags & DocsEnum.FLAG_OFFSETS) == 0) && + (!indexHasPayloads || (flags & DocsEnum.FLAG_PAYLOADS) == 0)) { BlockDocsAndPositionsEnum docsAndPositionsEnum; if (reuse instanceof BlockDocsAndPositionsEnum) { docsAndPositionsEnum = (BlockDocsAndPositionsEnum) reuse; @@ -391,6 +398,11 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int docID() { return doc; } @@ -572,7 +584,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { } - final class BlockDocsAndPositionsEnum extends DocsAndPositionsEnum { + final class BlockDocsAndPositionsEnum extends DocsEnum { private final byte[] encoded; @@ -650,7 +662,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { indexHasPayloads == fieldInfo.hasPayloads(); } - public DocsAndPositionsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException { + public DocsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException { this.liveDocs = liveDocs; // if (DEBUG) { // System.out.println(" FPR.reset: termState=" + termState); @@ -942,6 +954,9 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { // if (DEBUG) { // System.out.println(" FPR.nextPosition posPendingCount=" + posPendingCount + " posBufferUpto=" + posBufferUpto); // } + if (posPendingCount == 0) + return NO_MORE_POSITIONS; + if (posPendingFP != -1) { // if (DEBUG) { // System.out.println(" seek to pendingFP=" + posPendingFP); @@ -971,6 +986,16 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { } @Override + public int startPosition() { + return position; + } + + @Override + public int endPosition() { + return position; + } + + @Override public int startOffset() { return -1; } @@ -992,7 +1017,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { } // Also handles payloads + offsets - final class EverythingEnum extends DocsAndPositionsEnum { + final class EverythingEnum extends DocsEnum { private final byte[] encoded; @@ -1141,8 +1166,8 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; } - this.needsOffsets = (flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0; - this.needsPayloads = (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) != 0; + this.needsOffsets = (flags & DocsEnum.FLAG_OFFSETS) != 0; + this.needsPayloads = (flags & DocsEnum.FLAG_PAYLOADS) != 0; doc = -1; accum = 0; @@ -1498,6 +1523,9 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { // if (DEBUG) { // System.out.println(" FPR.nextPosition posPendingCount=" + posPendingCount + " posBufferUpto=" + posBufferUpto + " payloadByteUpto=" + payloadByteUpto)// ; // } + if (posPendingCount == 0) + return NO_MORE_POSITIONS; + if (posPendingFP != -1) { // if (DEBUG) { // System.out.println(" seek pos to pendingFP=" + posPendingFP); @@ -1551,6 +1579,16 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { } @Override + public int startPosition() { + return position; + } + + @Override + public int endPosition() { + return position; + } + + @Override public int startOffset() { return startOffset; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene410/Lucene410DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene410/Lucene410DocValuesProducer.java index 912e087..cb14e54 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene410/Lucene410DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene410/Lucene410DocValuesProducer.java @@ -17,38 +17,11 @@ package org.apache.lucene.codecs.lucene410; * limitations under the License. */ -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_PREFIX_COMPRESSED; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.DELTA_COMPRESSED; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.GCD_COMPRESSED; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.MONOTONIC_COMPRESSED; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.SORTED_SINGLE_VALUED; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.SORTED_WITH_ADDRESSES; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.TABLE_COMPRESSED; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_SHIFT; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_COUNT; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_MASK; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.REVERSE_INTERVAL_SHIFT; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.REVERSE_INTERVAL_MASK; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BLOCK_INTERVAL_SHIFT; -import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BLOCK_INTERVAL_MASK; - -import java.io.Closeable; // javadocs -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -74,6 +47,32 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.DirectReader; import org.apache.lucene.util.packed.MonotonicBlockPackedReader; +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_PREFIX_COMPRESSED; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BLOCK_INTERVAL_MASK; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BLOCK_INTERVAL_SHIFT; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.DELTA_COMPRESSED; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.GCD_COMPRESSED; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_COUNT; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_MASK; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_SHIFT; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.MONOTONIC_COMPRESSED; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.REVERSE_INTERVAL_MASK; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.REVERSE_INTERVAL_SHIFT; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.SORTED_SINGLE_VALUED; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.SORTED_WITH_ADDRESSES; +import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.TABLE_COMPRESSED; + /** reader for {@link Lucene410DocValuesFormat} */ class Lucene410DocValuesProducer extends DocValuesProducer implements Closeable { private final Map numerics; @@ -1084,7 +1083,7 @@ class Lucene410DocValuesProducer extends DocValuesProducer implements Closeable } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java index 3f62251..a421da1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java @@ -17,11 +17,10 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.index.IndexReader.ReaderClosedListener; import org.apache.lucene.util.Bits; +import java.io.IOException; + /** {@code AtomicReader} is an abstract class, providing an interface for accessing an index. Search of an index is done entirely through this abstract interface, so that any subclass which implements it is searchable. IndexReaders implemented @@ -238,11 +237,11 @@ public abstract class AtomicReader extends IndexReader { return null; } - /** Returns {@link DocsAndPositionsEnum} for the specified + /** Returns {@link DocsEnum} for the specified * term. This will return null if the * field or term does not exist or positions weren't indexed. - * @see TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum) */ - public final DocsAndPositionsEnum termPositionsEnum(Term term) throws IOException { + * @see TermsEnum#docsAndPositions(Bits, DocsEnum) */ + public final DocsEnum termPositionsEnum(Term term) throws IOException { assert term.field() != null; assert term.bytes() != null; final Fields fields = fields(); diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 52178f7..37603be 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -763,7 +763,7 @@ public class CheckIndex { DocsEnum docs = null; DocsEnum docsAndFreqs = null; - DocsAndPositionsEnum postings = null; + DocsEnum postings = null; String lastField = null; for (String field : fields) { @@ -1692,11 +1692,11 @@ public class CheckIndex { } DocsEnum docs = null; - DocsAndPositionsEnum postings = null; + DocsEnum postings = null; // Only used if crossCheckTermVectors is true: DocsEnum postingsDocs = null; - DocsAndPositionsEnum postingsPostings = null; + DocsEnum postingsPostings = null; final Bits liveDocs = reader.getLiveDocs(); diff --git a/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java deleted file mode 100644 index 60ac2bb..0000000 --- a/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java +++ /dev/null @@ -1,62 +0,0 @@ -package org.apache.lucene.index; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.util.Bits; // javadocs -import org.apache.lucene.util.BytesRef; - -/** Also iterates through positions. */ -public abstract class DocsAndPositionsEnum extends DocsEnum { - - /** Flag to pass to {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)} - * if you require offsets in the returned enum. */ - public static final int FLAG_OFFSETS = 0x1; - - /** Flag to pass to {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)} - * if you require payloads in the returned enum. */ - public static final int FLAG_PAYLOADS = 0x2; - - /** Sole constructor. (For invocation by subclass - * constructors, typically implicit.) */ - protected DocsAndPositionsEnum() { - } - - /** Returns the next position. You should only call this - * up to {@link DocsEnum#freq()} times else - * the behavior is not defined. If positions were not - * indexed this will return -1; this only happens if - * offsets were indexed and you passed needsOffset=true - * when pulling the enum. */ - public abstract int nextPosition() throws IOException; - - /** Returns start offset for the current position, or -1 - * if offsets were not indexed. */ - public abstract int startOffset() throws IOException; - - /** Returns end offset for the current position, or -1 if - * offsets were not indexed. */ - public abstract int endOffset() throws IOException; - - /** Returns the payload at this position, or null if no - * payload was indexed. You should not modify anything - * (neither members of the returned BytesRef nor bytes - * in the byte[]). */ - public abstract BytesRef getPayload() throws IOException; -} diff --git a/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java b/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java index fa4cf54..2bdecae 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java @@ -17,12 +17,13 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.Bits; // javadocs +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; /** Iterates through the documents and term freqs. * NOTE: you must first call {@link #nextDoc} before using @@ -31,9 +32,7 @@ public abstract class DocsEnum extends DocIdSetIterator { /** * Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)} if you don't - * require term frequencies in the returned enum. When passed to - * {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)} means - * that no offsets and payloads will be returned. + * require term frequencies in the returned enum. */ public static final int FLAG_NONE = 0x0; @@ -41,6 +40,20 @@ public abstract class DocsEnum extends DocIdSetIterator { * if you require term frequencies in the returned enum. */ public static final int FLAG_FREQS = 0x1; + /** Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)} + * if you require term positions in the returned enum. */ + public static final int FLAG_POSITIONS = 0x3; + + /** Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)} + * if you require offsets in the returned enum. */ + public static final int FLAG_OFFSETS = 0x7; + + /** Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)} + * if you require payloads in the returned enum. */ + public static final int FLAG_PAYLOADS = 0x11; + + public static final int NO_MORE_POSITIONS = Integer.MAX_VALUE; + private AttributeSource atts = null; /** Sole constructor. (For invocation by subclass @@ -65,4 +78,40 @@ public abstract class DocsEnum extends DocIdSetIterator { if (atts == null) atts = new AttributeSource(); return atts; } + + /** Returns the next position. You should only call this + * up to {@link DocsEnum#freq()} times else + * the behavior is not defined. If positions were not + * indexed this will return -1; this only happens if + * offsets were indexed and you passed needsOffset=true + * when pulling the enum. */ + public abstract int nextPosition() throws IOException; + + public int startPosition() throws IOException { + throw new UnsupportedOperationException("startPosition() is not implemented on " + this.getClass().getSimpleName()); + } + + public int endPosition() throws IOException { + throw new UnsupportedOperationException("endPosition() is not implemented on " + this.getClass().getSimpleName()); + } + + /** Returns start offset for the current position, or -1 + * if offsets were not indexed. */ + public int startOffset() throws IOException { + throw new UnsupportedOperationException("startOffset() is not implemented on " + this.getClass().getSimpleName()); + } + + /** Returns end offset for the current position, or -1 if + * offsets were not indexed. */ + public int endOffset() throws IOException { + throw new UnsupportedOperationException("endOffset() is not implemented on " + this.getClass().getSimpleName()); + } + + /** Returns the payload at this position, or null if no + * payload was indexed. You should not modify anything + * (neither members of the returned BytesRef nor bytes + * in the byte[]). */ + public BytesRef getPayload() throws IOException { + return null; + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java index 52f542d..e7847c4 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java @@ -204,7 +204,7 @@ public class FilterAtomicReader extends AtomicReader { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { return in.docsAndPositions(liveDocs, reuse, flags); } } @@ -248,52 +248,18 @@ public class FilterAtomicReader extends AtomicReader { } @Override - public long cost() { - return in.cost(); - } - } - - /** Base class for filtering {@link DocsAndPositionsEnum} implementations. */ - public static class FilterDocsAndPositionsEnum extends DocsAndPositionsEnum { - /** The underlying DocsAndPositionsEnum instance. */ - protected final DocsAndPositionsEnum in; - - /** - * Create a new FilterDocsAndPositionsEnum - * @param in the underlying DocsAndPositionsEnum instance. - */ - public FilterDocsAndPositionsEnum(DocsAndPositionsEnum in) { - this.in = in; - } - - @Override - public AttributeSource attributes() { - return in.attributes(); - } - - @Override - public int docID() { - return in.docID(); - } - - @Override - public int freq() throws IOException { - return in.freq(); - } - - @Override - public int nextDoc() throws IOException { - return in.nextDoc(); + public int nextPosition() throws IOException { + return in.nextPosition(); } @Override - public int advance(int target) throws IOException { - return in.advance(target); + public int startPosition() throws IOException { + return in.startPosition(); } @Override - public int nextPosition() throws IOException { - return in.nextPosition(); + public int endPosition() throws IOException { + return in.endPosition(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java index b6bfcc4..df4e0d8 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java @@ -184,7 +184,7 @@ public abstract class FilteredTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits bits, DocsEnum reuse, int flags) throws IOException { return tenum.docsAndPositions(bits, reuse, flags); } diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java index 5d01754..e67a08c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java +++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java @@ -17,19 +17,19 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray; -import org.apache.lucene.util.AttributeSource; // javadocs +import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import java.io.IOException; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + /** Implements limited (iterators only, no stats) {@link * Fields} interface over the in-RAM buffered * fields/terms/postings, to flush postings through the @@ -257,7 +257,7 @@ class FreqProxFields extends Fields { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) { if (liveDocs != null) { throw new IllegalArgumentException("liveDocs must be null"); } @@ -269,7 +269,7 @@ class FreqProxFields extends Fields { throw new IllegalArgumentException("did not index positions"); } - if (!terms.hasOffsets && (flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0) { + if (!terms.hasOffsets && (flags & DocsEnum.FLAG_OFFSETS) == DocsEnum.FLAG_OFFSETS) { // Caller wants offsets but we didn't index them; // don't lie: throw new IllegalArgumentException("did not index offsets"); @@ -349,6 +349,11 @@ class FreqProxFields extends Fields { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int nextDoc() throws IOException { if (reader.eof()) { if (ended) { @@ -390,7 +395,7 @@ class FreqProxFields extends Fields { } } - private static class FreqProxDocsAndPositionsEnum extends DocsAndPositionsEnum { + private static class FreqProxDocsAndPositionsEnum extends DocsEnum { final FreqProxTermsWriterPerField terms; final FreqProxPostingsArray postingsArray; diff --git a/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java b/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java index 597164b..567712c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java +++ b/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java @@ -17,10 +17,10 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.util.Bits; +import java.io.IOException; + import static org.apache.lucene.index.FilterAtomicReader.FilterFields; import static org.apache.lucene.index.FilterAtomicReader.FilterTerms; import static org.apache.lucene.index.FilterAtomicReader.FilterTermsEnum; @@ -120,7 +120,7 @@ public class MappedMultiFields extends FilterFields { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { if (liveDocs != null) { throw new IllegalArgumentException("liveDocs must be null"); } diff --git a/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java index bcc3735..cc16960 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java @@ -29,12 +29,12 @@ import java.io.IOException; * @lucene.experimental */ -final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum { +final class MappingMultiDocsAndPositionsEnum extends DocsEnum { private MultiDocsAndPositionsEnum.EnumWithSlice[] subs; int numSubs; int upto; MergeState.DocMap currentMap; - DocsAndPositionsEnum current; + DocsEnum current; int currentBase; int doc = -1; private MergeState mergeState; diff --git a/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java index 148ea5c..1acdc9c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java @@ -70,6 +70,11 @@ final class MappingMultiDocsEnum extends DocsEnum { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int docID() { return doc; } diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java index 33e2127..434c6ae 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java @@ -28,20 +28,20 @@ import java.util.Arrays; * @lucene.experimental */ -public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum { +public final class MultiDocsAndPositionsEnum extends DocsEnum { private final MultiTermsEnum parent; - final DocsAndPositionsEnum[] subDocsAndPositionsEnum; + final DocsEnum[] subDocsAndPositionsEnum; private final EnumWithSlice[] subs; int numSubs; int upto; - DocsAndPositionsEnum current; + DocsEnum current; int currentBase; int doc = -1; /** Sole constructor. */ public MultiDocsAndPositionsEnum(MultiTermsEnum parent, int subReaderCount) { this.parent = parent; - subDocsAndPositionsEnum = new DocsAndPositionsEnum[subReaderCount]; + subDocsAndPositionsEnum = new DocsEnum[subReaderCount]; this.subs = new EnumWithSlice[subReaderCount]; for (int i = 0; i < subs.length; i++) { subs[i] = new EnumWithSlice(); @@ -159,14 +159,14 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum { } // TODO: implement bulk read more efficiently than super - /** Holds a {@link DocsAndPositionsEnum} along with the + /** Holds a {@link DocsEnum} along with the * corresponding {@link ReaderSlice}. */ public final static class EnumWithSlice { EnumWithSlice() { } - /** {@link DocsAndPositionsEnum} for this sub-reader. */ - public DocsAndPositionsEnum docsAndPositionsEnum; + /** {@link DocsEnum} for this sub-reader. */ + public DocsEnum docsAndPositionsEnum; /** {@link ReaderSlice} describing how this sub-reader * fits into the composite reader. */ diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java index 082d266..d1963db 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java @@ -18,6 +18,8 @@ package org.apache.lucene.index; */ +import org.apache.lucene.util.BytesRef; + import java.io.IOException; import java.util.Arrays; @@ -89,6 +91,26 @@ public final class MultiDocsEnum extends DocsEnum { public int docID() { return doc; } + + @Override + public int nextPosition() throws IOException { + return current.nextPosition(); + } + + @Override + public int startOffset() throws IOException { + return current.startOffset(); + } + + @Override + public int endOffset() throws IOException { + return current.endOffset(); + } + + @Override + public BytesRef getPayload() throws IOException { + return current.getPayload(); + } @Override public int advance(int target) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiFields.java b/lucene/core/src/java/org/apache/lucene/index/MultiFields.java index c16738c..a537cc7 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiFields.java @@ -158,22 +158,22 @@ public final class MultiFields extends Fields { return null; } - /** Returns {@link DocsAndPositionsEnum} for the specified + /** Returns {@link DocsEnum} for the specified * field & term. This will return null if the field or * term does not exist or positions were not indexed. * @see #getTermPositionsEnum(IndexReader, Bits, String, BytesRef, int) */ - public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException { - return getTermPositionsEnum(r, liveDocs, field, term, DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS); + public static DocsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException { + return getTermPositionsEnum(r, liveDocs, field, term, DocsEnum.FLAG_OFFSETS | DocsEnum.FLAG_PAYLOADS); } - /** Returns {@link DocsAndPositionsEnum} for the specified + /** Returns {@link DocsEnum} for the specified * field & term, with control over whether offsets and payloads are * required. Some codecs may be able to optimize * their implementation when offsets and/or payloads are not * required. This will return null if the field or term does not * exist or positions were not indexed. See {@link - * TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}. */ - public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, int flags) throws IOException { + * TermsEnum#docs(Bits,DocsEnum,int)}. */ + public static DocsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, int flags) throws IOException { assert field != null; assert term != null; final Terms terms = getTerms(r, field); diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java index 6ae2c7c..5b71d3d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java @@ -401,7 +401,7 @@ public final class MultiTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { MultiDocsAndPositionsEnum docsAndPositionsEnum; // Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum if (reuse != null && reuse instanceof MultiDocsAndPositionsEnum) { @@ -452,7 +452,7 @@ public final class MultiTermsEnum extends TermsEnum { } assert entry.index < docsAndPositionsEnum.subDocsAndPositionsEnum.length: entry.index + " vs " + docsAndPositionsEnum.subDocsAndPositionsEnum.length + "; " + subs.length; - final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index], flags); + final DocsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index], flags); if (subPostings != null) { docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index] = subPostings; diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java index 16427cc..f12e2b8 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java @@ -17,12 +17,12 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import java.io.IOException; + /** Implements a {@link TermsEnum} wrapping a provided * {@link SortedDocValues}. */ @@ -114,7 +114,7 @@ class SortedDocValuesTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java index 64dba95..68658ca 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java @@ -17,12 +17,12 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import java.io.IOException; + /** Implements a {@link TermsEnum} wrapping a provided * {@link SortedSetDocValues}. */ @@ -114,7 +114,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/TermContext.java b/lucene/core/src/java/org/apache/lucene/index/TermContext.java index ac80a94..262548a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermContext.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermContext.java @@ -17,11 +17,11 @@ package org.apache.lucene.index; * limitations under the License. */ +import org.apache.lucene.util.BytesRef; + import java.io.IOException; import java.util.Arrays; -import org.apache.lucene.util.BytesRef; - /** * Maintains a {@link IndexReader} {@link TermState} view over * {@link IndexReader} instances containing a single term. The diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java index 895018b..a9b4436 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java @@ -17,18 +17,18 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; +import java.io.IOException; + /** Iterator to seek ({@link #seekCeil(BytesRef)}, {@link * #seekExact(BytesRef)}) or step through ({@link * #next} terms to obtain frequency information ({@link * #docFreq}), {@link DocsEnum} or {@link - * DocsAndPositionsEnum} for the current term ({@link + * DocsEnum} for the current term ({@link * #docs}. * *

Term enumerations are always ordered by @@ -162,20 +162,20 @@ public abstract class TermsEnum implements BytesRefIterator { * @see #docs(Bits, DocsEnum, int) */ public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException; - /** Get {@link DocsAndPositionsEnum} for the current term. + /** Get {@link DocsEnum} for the current term. * Do not call this when the enum is unpositioned. This * method will return null if positions were not * indexed. * * @param liveDocs unset bits are documents that should not * be returned - * @param reuse pass a prior DocsAndPositionsEnum for possible reuse - * @see #docsAndPositions(Bits, DocsAndPositionsEnum, int) */ - public final DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { - return docsAndPositions(liveDocs, reuse, DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS); + * @param reuse pass a prior DocsEnum for possible reuse + **/ + public final DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse) throws IOException { + return docsAndPositions(liveDocs, reuse, DocsEnum.FLAG_OFFSETS | DocsEnum.FLAG_PAYLOADS); } - /** Get {@link DocsAndPositionsEnum} for the current term, + /** Get {@link DocsEnum} for the current term, * with control over whether offsets and payloads are * required. Some codecs may be able to optimize their * implementation when offsets and/or payloads are not required. @@ -184,11 +184,11 @@ public abstract class TermsEnum implements BytesRefIterator { * @param liveDocs unset bits are documents that should not * be returned - * @param reuse pass a prior DocsAndPositionsEnum for possible reuse + * @param reuse pass a prior DocsEnum for possible reuse * @param flags specifies which optional per-position values you - * require; see {@link DocsAndPositionsEnum#FLAG_OFFSETS} and - * {@link DocsAndPositionsEnum#FLAG_PAYLOADS}. */ - public abstract DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException; + * require; see {@link DocsEnum#FLAG_OFFSETS} and + * {@link DocsEnum#FLAG_PAYLOADS}. */ + public abstract DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException; /** * Expert: Returns the TermsEnums internal state to position the TermsEnum @@ -250,11 +250,6 @@ public abstract class TermsEnum implements BytesRefIterator { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { - throw new IllegalStateException("this method should never be called"); - } - - @Override public BytesRef next() { return null; } @@ -273,5 +268,11 @@ public abstract class TermsEnum implements BytesRefIterator { public void seekExact(BytesRef term, TermState state) { throw new IllegalStateException("this method should never be called"); } + + @Override + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) + throws IOException { + throw new IllegalStateException("this method should never be called"); + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 4d7635d..7230cdb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -17,14 +17,8 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; @@ -32,6 +26,13 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + /** A Query that matches documents matching boolean combinations of other * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other * BooleanQuerys. @@ -242,7 +243,7 @@ public class BooleanQuery extends Query implements Iterable { for (Iterator wIter = weights.iterator(); wIter.hasNext();) { Weight w = wIter.next(); BooleanClause c = cIter.next(); - if (w.scorer(context, context.reader().getLiveDocs()) == null) { + if (w.scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()) == null) { if (c.isRequired()) { fail = true; Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); @@ -305,13 +306,13 @@ public class BooleanQuery extends Query implements Iterable { } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) throws IOException { if (scoreDocsInOrder || minNrShouldMatch > 1) { // TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch // but the same is even true of pure conjunctions... - return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); } List prohibited = new ArrayList(); @@ -319,7 +320,7 @@ public class BooleanQuery extends Query implements Iterable { Iterator cIter = clauses.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); - BulkScorer subScorer = w.bulkScorer(context, false, acceptDocs); + BulkScorer subScorer = w.bulkScorer(context, false, flags, acceptDocs); if (subScorer == null) { if (c.isRequired()) { return null; @@ -328,7 +329,7 @@ public class BooleanQuery extends Query implements Iterable { // TODO: there are some cases where BooleanScorer // would handle conjunctions faster than // BooleanScorer2... - return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); } else if (c.isProhibited()) { prohibited.add(subScorer); } else { @@ -340,7 +341,7 @@ public class BooleanQuery extends Query implements Iterable { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { // initially the user provided value, // but if minNrShouldMatch == optional.size(), @@ -353,7 +354,7 @@ public class BooleanQuery extends Query implements Iterable { Iterator cIter = clauses.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); - Scorer subScorer = w.scorer(context, acceptDocs); + Scorer subScorer = w.scorer(context, flags, acceptDocs); if (subScorer == null) { if (c.isRequired()) { return null; @@ -454,8 +455,17 @@ public class BooleanQuery extends Query implements Iterable { // scorer() will return an out-of-order scorer if requested. return true; } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer("BooleanWeight["); + for (Weight weight : weights) { + sb.append(weight.toString()).append(","); + } + return sb.append("]").toString(); + } - private Scorer req(List required, boolean disableCoord) { + private Scorer req(List required, boolean disableCoord) throws IOException { if (required.size() == 1) { Scorer req = required.get(0); if (!disableCoord && maxCoord > 1) { diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java index 173bb44..8034522 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java @@ -17,15 +17,12 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.BooleanQuery.BooleanWeight; +import java.io.IOException; +import java.util.List; + /* Description from Doug Cutting (excerpted from * LUCENE-1483): * @@ -102,8 +99,13 @@ final class BooleanScorer extends BulkScorer { return true; } + @Override + public int postingFeatures() { + return DocsEnum.FLAG_FREQS; + } + } - + static final class Bucket { int doc = -1; // tells if bucket is valid double score; // incremental score diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java b/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java index 2c49ec7..721ade2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java @@ -21,8 +21,6 @@ import java.io.IOException; import java.util.Collection; import java.util.Collections; -import org.apache.lucene.search.Scorer.ChildScorer; - /** Internal document-at-a-time scorers used to deal with stupid coord() computation */ class BooleanTopLevelScorers { @@ -61,7 +59,7 @@ class BooleanTopLevelScorers { private final Scorer req; private final Scorer opt; - CoordinatingConjunctionScorer(Weight weight, float coords[], Scorer req, int reqCount, Scorer opt) { + CoordinatingConjunctionScorer(Weight weight, float coords[], Scorer req, int reqCount, Scorer opt) throws IOException { super(weight, new Scorer[] { req, opt }); this.coords = coords; this.req = req; diff --git a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java index c5957d8..e60e37e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java @@ -74,10 +74,16 @@ public abstract class CachingCollector extends FilterCollector { public final int freq() { throw new UnsupportedOperationException(); } @Override + public int nextPosition() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public final int nextDoc() { throw new UnsupportedOperationException(); } @Override public long cost() { return 1; } + } private static class NoScoreCachingCollector extends CachingCollector { @@ -96,6 +102,11 @@ public abstract class CachingCollector extends FilterCollector { docs = new ArrayList<>(); } + @Override + public int postingFeatures() { + return in.postingFeatures(); + } + protected NoScoreCachingLeafCollector wrap(LeafCollector in, int maxDocsToCache) { return new NoScoreCachingLeafCollector(in, maxDocsToCache); } @@ -304,7 +315,7 @@ public abstract class CachingCollector extends FilterCollector { * @param acceptDocsOutOfOrder * whether documents are allowed to be collected out-of-order */ - public static CachingCollector create(final boolean acceptDocsOutOfOrder, boolean cacheScores, double maxRAMMB) { + public static CachingCollector create(final boolean acceptDocsOutOfOrder, final int flags, boolean cacheScores, double maxRAMMB) { Collector other = new SimpleCollector() { @Override public boolean acceptsDocsOutOfOrder() { @@ -312,6 +323,11 @@ public abstract class CachingCollector extends FilterCollector { } @Override + public int postingFeatures() { + return flags; + } + + @Override public void collect(int doc) {} }; diff --git a/lucene/core/src/java/org/apache/lucene/search/Collector.java b/lucene/core/src/java/org/apache/lucene/search/Collector.java index bb47394..b93ee25 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Collector.java +++ b/lucene/core/src/java/org/apache/lucene/search/Collector.java @@ -17,10 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; +import java.io.IOException; + /** *

Expert: Collectors are primarily meant to be used to * gather raw results from a search, and implement sorting @@ -72,5 +72,10 @@ public interface Collector { * next atomic reader context */ LeafCollector getLeafCollector(AtomicReaderContext context) throws IOException; - + + /** + * Returns the posting features required by this collector. + */ + public int postingFeatures(); + } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java index 3e81187..760d755 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -17,126 +17,183 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.util.ArrayUtil; + import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; -import org.apache.lucene.util.ArrayUtil; - /** Scorer for conjunctions, sets of queries, all of which are required. */ class ConjunctionScorer extends Scorer { - protected int lastDoc = -1; - protected final DocsAndFreqs[] docsAndFreqs; - private final DocsAndFreqs lead; + + private final Scorer[] scorersOrdered; + private final Scorer[] scorers; + private int lastDoc = -1; private final float coord; + final PositionQueue posQueue; - ConjunctionScorer(Weight weight, Scorer[] scorers) { + public ConjunctionScorer(Weight weight, Scorer[] scorers) throws IOException { this(weight, scorers, 1f); } - ConjunctionScorer(Weight weight, Scorer[] scorers, float coord) { + public ConjunctionScorer(Weight weight, Scorer[] scorers, float coord) throws IOException { super(weight); + scorersOrdered = new Scorer[scorers.length]; + System.arraycopy(scorers, 0, scorersOrdered, 0, scorers.length); + this.scorers = scorers; this.coord = coord; - this.docsAndFreqs = new DocsAndFreqs[scorers.length]; + posQueue = new PositionQueue(scorers); + for (int i = 0; i < scorers.length; i++) { - docsAndFreqs[i] = new DocsAndFreqs(scorers[i]); + if (scorers[i].nextDoc() == NO_MORE_DOCS) { + // If even one of the sub-scorers does not have any documents, this + // scorer should not attempt to do any more work. + lastDoc = NO_MORE_DOCS; + return; + } } - // Sort the array the first time to allow the least frequent DocsEnum to - // lead the matching. - ArrayUtil.timSort(docsAndFreqs, new Comparator() { + + // Sort the array the first time... + // We don't need to sort the array in any future calls because we know + // it will already start off sorted (all scorers on same doc). + + // Note that this comparator is not consistent with equals! + // Also we use mergeSort here to be stable (so order of Scorers that + // match on first document keeps preserved): + ArrayUtil.timSort(scorers, new Comparator() { // sort the array @Override - public int compare(DocsAndFreqs o1, DocsAndFreqs o2) { - return Long.compare(o1.cost, o2.cost); + public int compare(Scorer o1, Scorer o2) { + return o1.docID() - o2.docID(); } }); - lead = docsAndFreqs[0]; // least frequent DocsEnum leads the intersection - } + // NOTE: doNext() must be called before the re-sorting of the array later on. + // The reason is this: assume there are 5 scorers, whose first docs are 1, + // 2, 3, 5, 5 respectively. Sorting (above) leaves the array as is. Calling + // doNext() here advances all the first scorers to 5 (or a larger doc ID + // they all agree on). + // However, if we re-sort before doNext() is called, the order will be 5, 3, + // 2, 1, 5 and then doNext() will stop immediately, since the first scorer's + // docs equals the last one. So the invariant that after calling doNext() + // all scorers are on the same doc ID is broken. + if (doNext() == NO_MORE_DOCS) { + // The scorers did not agree on any document. + lastDoc = NO_MORE_DOCS; + return; + } - private int doNext(int doc) throws IOException { - for(;;) { - // doc may already be NO_MORE_DOCS here, but we don't check explicitly - // since all scorers should advance to NO_MORE_DOCS, match, then - // return that value. - advanceHead: for(;;) { - for (int i = 1; i < docsAndFreqs.length; i++) { - // invariant: docsAndFreqs[i].doc <= doc at this point. - - // docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead" - // on the previous iteration and the advance on the lead scorer exactly matched. - if (docsAndFreqs[i].doc < doc) { - docsAndFreqs[i].doc = docsAndFreqs[i].scorer.advance(doc); - - if (docsAndFreqs[i].doc > doc) { - // DocsEnum beyond the current doc - break and advance lead to the new highest doc. - doc = docsAndFreqs[i].doc; - break advanceHead; - } - } - } - // success - all DocsEnums are on the same doc - return doc; - } - // advance head for next iteration - doc = lead.doc = lead.scorer.advance(doc); + // If first-time skip distance is any predictor of + // scorer sparseness, then we should always try to skip first on + // those scorers. + // Keep last scorer in it's last place (it will be the first + // to be skipped on), but reverse all of the others so that + // they will be skipped on in order of original high skip. + int end = scorers.length - 1; + int max = end >> 1; + for (int i = 0; i < max; i++) { + Scorer tmp = scorers[i]; + int idx = end - i - 1; + scorers[i] = scorers[idx]; + scorers[idx] = tmp; } } + private int doNext() throws IOException { + int first = 0; + int doc = scorers[scorers.length - 1].docID(); + Scorer firstScorer; + while ((firstScorer = scorers[first]).docID() < doc) { + doc = firstScorer.advance(doc); + first = first == scorers.length - 1 ? 0 : first + 1; + } + posQueue.advanceTo(doc); + return doc; + } + @Override public int advance(int target) throws IOException { - lead.doc = lead.scorer.advance(target); - return lastDoc = doNext(lead.doc); + if (lastDoc == NO_MORE_DOCS) { + return lastDoc; + } else if (scorers[(scorers.length - 1)].docID() < target) { + scorers[(scorers.length - 1)].advance(target); + } + return lastDoc = doNext(); } @Override public int docID() { return lastDoc; } - + @Override public int nextDoc() throws IOException { - lead.doc = lead.scorer.nextDoc(); - return lastDoc = doNext(lead.doc); + if (lastDoc == NO_MORE_DOCS) { + return lastDoc; + } else if (lastDoc == -1) { + lastDoc = scorers[scorers.length - 1].docID(); + posQueue.advanceTo(lastDoc); + return lastDoc; + } + scorers[(scorers.length - 1)].nextDoc(); + return lastDoc = doNext(); } - + @Override public float score() throws IOException { // TODO: sum into a double and cast to float if we ever send required clauses to BS1 float sum = 0.0f; - for (DocsAndFreqs docs : docsAndFreqs) { - sum += docs.scorer.score(); + for (int i = 0; i < scorers.length; i++) { + sum += scorers[i].score(); } return sum * coord; } - + @Override - public int freq() { - return docsAndFreqs.length; + public int freq() throws IOException { + return scorers.length; } @Override - public long cost() { - return lead.scorer.cost(); + public int nextPosition() throws IOException { + return posQueue.nextPosition(); } @Override - public Collection getChildren() { - ArrayList children = new ArrayList<>(docsAndFreqs.length); - for (DocsAndFreqs docs : docsAndFreqs) { - children.add(new ChildScorer(docs.scorer, "MUST")); + public int startPosition() throws IOException { + return posQueue.startPosition(); + } + + @Override + public int endPosition() throws IOException { + return posQueue.endPosition(); + } + + @Override + public int startOffset() throws IOException { + return posQueue.startOffset(); + } + + @Override + public int endOffset() throws IOException { + return posQueue.endOffset(); + } + + @Override + public long cost() { + long sum = 0; + for (int i = 0; i < scorers.length; i++) { + sum += scorers[i].cost(); } - return children; + return sum; // nocommit is this right? } - static final class DocsAndFreqs { - final long cost; - final Scorer scorer; - int doc = -1; - - DocsAndFreqs(Scorer scorer) { - this.scorer = scorer; - this.cost = scorer.cost(); + @Override + public Collection getChildren() { + ArrayList children = new ArrayList(scorers.length); + for (Scorer scorer : scorersOrdered) { + children.add(new ChildScorer(scorer, "MUST")); } + return children; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 2b7f4ed..7aa4984 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.util.Bits; @@ -134,14 +135,14 @@ public class ConstantScoreQuery extends Query { } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; - return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); } else { assert query != null && innerWeight != null; - BulkScorer bulkScorer = innerWeight.bulkScorer(context, scoreDocsInOrder, acceptDocs); + BulkScorer bulkScorer = innerWeight.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); if (bulkScorer == null) { return null; } @@ -150,7 +151,7 @@ public class ConstantScoreQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; @@ -161,7 +162,7 @@ public class ConstantScoreQuery extends Query { disi = dis.iterator(); } else { assert query != null && innerWeight != null; - disi = innerWeight.scorer(context, acceptDocs); + disi = innerWeight.scorer(context, flags, acceptDocs); } if (disi == null) { @@ -177,7 +178,7 @@ public class ConstantScoreQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - final Scorer cs = scorer(context, context.reader().getLiveDocs()); + final Scorer cs = scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); final boolean exists = (cs != null && cs.advance(doc) == doc); final ComplexExplanation result = new ComplexExplanation(); @@ -259,10 +260,15 @@ public class ConstantScoreQuery extends Query { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int advance(int target) throws IOException { return docIdSetIterator.advance(target); } - + @Override public long cost() { return docIdSetIterator.cost(); diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index c195497..3888271 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -16,6 +16,11 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.Bits; + import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -23,11 +28,6 @@ import java.util.Iterator; import java.util.List; import java.util.Set; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.util.Bits; - /** * A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum * score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries. @@ -153,11 +153,11 @@ public class DisjunctionMaxQuery extends Query implements Iterable { /** Create the scorer used to score our associated DisjunctionMaxQuery */ @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { List scorers = new ArrayList<>(); for (Weight w : weights) { // we will advance() subscorers - Scorer subScorer = w.scorer(context, acceptDocs); + Scorer subScorer = w.scorer(context, flags, acceptDocs); if (subScorer != null) { scorers.add(subScorer); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java index b5d0a0d..e80242e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java @@ -46,6 +46,7 @@ final class DisjunctionMaxScorer extends DisjunctionScorer { DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, Scorer[] subScorers) { super(weight, subScorers); this.tieBreakerMultiplier = tieBreakerMultiplier; + } @Override @@ -66,4 +67,5 @@ final class DisjunctionMaxScorer extends DisjunctionScorer { protected float getFinal() { return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier; } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index 5b7e2ff..f4b8127 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -20,23 +20,26 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Locale; /** * Base class for Scorers that score disjunctions. */ abstract class DisjunctionScorer extends Scorer { - private final Scorer subScorers[]; - private int numScorers; + protected final Scorer subScorers[]; /** The document number of the current match. */ protected int doc = -1; + protected int numScorers; + protected PositionQueue posQueue; /** Number of matching scorers for the current match. */ protected int freq = -1; - + protected DisjunctionScorer(Weight weight, Scorer subScorers[]) { super(weight); this.subScorers = subScorers; this.numScorers = subScorers.length; + this.posQueue = new PositionQueue(subScorers); if (numScorers <= 1) { throw new IllegalArgumentException("There must be at least 2 subScorers"); } @@ -115,6 +118,45 @@ abstract class DisjunctionScorer extends Scorer { } @Override + public int nextPosition() throws IOException { + //System.out.println("Advancing " + this.toString()); + int pos = posQueue.nextPosition(); + //System.out.println(this); + return pos; + } + + @Override + public int startPosition() throws IOException { + return posQueue.startPosition(); + } + + @Override + public int endPosition() throws IOException { + return posQueue.endPosition(); + } + + @Override + public int startOffset() throws IOException { + return posQueue.startOffset(); + } + + @Override + public int endOffset() throws IOException { + return posQueue.endOffset(); + } + + @Override + public String toString() { + try { + return String.format(Locale.ROOT, "DisjScorer[%s] %d(%d)->%d(%d)", weight.toString(), + posQueue.startPosition(), + posQueue.startOffset(), posQueue.endPosition(), posQueue.endOffset()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override public final long cost() { long sum = 0; for (int i = 0; i < numScorers; i++) { @@ -143,6 +185,7 @@ abstract class DisjunctionScorer extends Scorer { int docID = subScorers[0].docID(); if (docID != doc) { freq = -1; + posQueue.advanceTo(docID); return doc = docID; } } @@ -163,11 +206,12 @@ abstract class DisjunctionScorer extends Scorer { int docID = subScorers[0].docID(); if (docID >= target) { freq = -1; + posQueue.advanceTo(docID); return doc = docID; } } } - + // if we haven't already computed freq + score, do so private void visitScorers() throws IOException { reset(); @@ -209,4 +253,5 @@ abstract class DisjunctionScorer extends Scorer { /** Return final score */ protected abstract float getFinal(); + } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java index f291695..f775ad6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java @@ -19,13 +19,14 @@ package org.apache.lucene.search; import java.io.IOException; + /** A Scorer for OR like queries, counterpart of ConjunctionScorer. * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. */ final class DisjunctionSumScorer extends DisjunctionScorer { private double score; private final float[] coord; - + /** Construct a DisjunctionScorer. * @param weight The weight to be used. * @param subScorers Array of at least two subscorers. @@ -50,4 +51,5 @@ final class DisjunctionSumScorer extends DisjunctionScorer { protected float getFinal() { return (float)score * coord[freq]; } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index e73b241..7f31062 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -17,50 +17,54 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.PhraseQuery.TermDocsEnumFactory; +import org.apache.lucene.search.similarities.Similarity; + import java.io.IOException; import java.util.Arrays; -import org.apache.lucene.index.*; -import org.apache.lucene.search.similarities.Similarity; - final class ExactPhraseScorer extends Scorer { private final int endMinus1; - + private final static int CHUNK = 4096; - + private int gen; private final int[] counts = new int[CHUNK]; private final int[] gens = new int[CHUNK]; + + boolean noDocs; private final long cost; private final static class ChunkState { - final DocsAndPositionsEnum posEnum; + final TermDocsEnumFactory factory; + final DocsEnum posEnum; final int offset; int posUpto; int posLimit; int pos; int lastPos; - public ChunkState(DocsAndPositionsEnum posEnum, int offset) { + public ChunkState(TermDocsEnumFactory factory, DocsEnum posEnum, int offset) { + this.factory = factory; this.posEnum = posEnum; this.offset = offset; } } - + private final ChunkState[] chunkStates; - private final DocsAndPositionsEnum lead; + private final DocsEnum lead; private int docID = -1; - private int freq; private final Similarity.SimScorer docScorer; - + ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity.SimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; - + chunkStates = new ChunkState[postings.length]; endMinus1 = postings.length-1; @@ -70,7 +74,7 @@ final class ExactPhraseScorer extends Scorer { cost = lead.cost(); for(int i=0;i 0) { + } else if (firstPosition() != NO_MORE_POSITIONS) { return doc; // success: matches phrase } else { doc = lead.nextDoc(); // doesn't match phrase @@ -103,7 +107,7 @@ final class ExactPhraseScorer extends Scorer { doc = lead.advance(doc); } } - + @Override public int nextDoc() throws IOException { return docID = doNext(lead.nextDoc()); @@ -113,51 +117,116 @@ final class ExactPhraseScorer extends Scorer { public int advance(int target) throws IOException { return docID = doNext(lead.advance(target)); } - + @Override public String toString() { return "ExactPhraseScorer(" + weight + ")"; } - + @Override - public int freq() { + public int freq() throws IOException { + int freq = 0; + while (nextPosition() != NO_MORE_DOCS) { + freq++; + } return freq; } - + @Override public int docID() { return docID; } - + @Override - public float score() { - return docScorer.score(docID, freq); + public float score() throws IOException { + return docScorer.score(docID, freq()); } - private int phraseFreq() throws IOException { + private int chunkStart = 0; + private int chunkEnd = CHUNK; - freq = 0; + private int posRemaining; + private int positionsInChunk; + private boolean cached = false; - // init chunks - for(int i=0;i cs.lastPos) { cs.lastPos = cs.pos; final int posIndex = cs.pos - chunkStart; @@ -179,7 +248,7 @@ final class ExactPhraseScorer extends Scorer { } if (cs.posUpto == cs.posLimit) { - end = true; + exhausted = true; break; } cs.posUpto++; @@ -189,10 +258,10 @@ final class ExactPhraseScorer extends Scorer { // middle terms boolean any = true; - for(int t=1;t cs.lastPos) { cs.lastPos = cs.pos; final int posIndex = cs.pos - chunkStart; @@ -204,7 +273,7 @@ final class ExactPhraseScorer extends Scorer { } if (cs.posUpto == cs.posLimit) { - end = true; + exhausted = true; break; } cs.posUpto++; @@ -220,6 +289,8 @@ final class ExactPhraseScorer extends Scorer { // petered out for this chunk chunkStart += CHUNK; chunkEnd += CHUNK; + if (exhausted) + return false; continue; } @@ -227,17 +298,17 @@ final class ExactPhraseScorer extends Scorer { { final ChunkState cs = chunkStates[endMinus1]; - while(cs.pos < chunkEnd) { + while (cs.pos < chunkEnd) { if (cs.pos > cs.lastPos) { cs.lastPos = cs.pos; final int posIndex = cs.pos - chunkStart; - if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) { - freq++; + if (posIndex >= 0 && gens[posIndex] == gen + && counts[posIndex] == endMinus1) { + addPosition(cs.pos); } } if (cs.posUpto == cs.posLimit) { - end = true; break; } cs.posUpto++; @@ -247,13 +318,15 @@ final class ExactPhraseScorer extends Scorer { chunkStart += CHUNK; chunkEnd += CHUNK; - } - return freq; + posRemaining = positionsInChunk; + return true; + } } @Override public long cost() { return cost; } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java index e2a50c8..66e130d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java @@ -17,6 +17,7 @@ package org.apache.lucene.search; * limitations under the License. */ +import java.io.IOException; import java.util.Collection; /** Used by {@link BulkScorer}s that need to pass a {@link @@ -46,6 +47,11 @@ final class FakeScorer extends Scorer { } @Override + public int nextPosition() throws IOException { + throw new UnsupportedOperationException("FakeScorer doesn't support nextPosition()"); + } + + @Override public int nextDoc() { throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java b/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java index 247bb03..3f6e74e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java @@ -1,9 +1,9 @@ package org.apache.lucene.search; -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; +import java.io.IOException; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -41,6 +41,11 @@ public class FilterCollector implements Collector { } @Override + public int postingFeatures() { + return in.postingFeatures(); + } + + @Override public String toString() { return getClass().getSimpleName() + "(" + in + ")"; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java index 88881bd..c2bfab6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java @@ -17,11 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; - import org.apache.lucene.util.AttributeSource; +import java.io.IOException; + /** * A {@code FilterScorer} contains another {@code Scorer}, which it * uses as its basic source of data, possibly transforming the data along the @@ -61,6 +60,11 @@ abstract class FilterScorer extends Scorer { } @Override + public int nextPosition() throws IOException { + return in.nextPosition(); + } + + @Override public int advance(int target) throws IOException { return in.advance(target); } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java b/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java index d700a30..74b8828 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java @@ -124,7 +124,7 @@ public class FilteredQuery extends Query { // return a filtering scorer @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { assert filter != null; DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs); @@ -133,12 +133,13 @@ public class FilteredQuery extends Query { return null; } - return strategy.filteredScorer(context, weight, filterDocIdSet); + return strategy.filteredScorer(context, weight, filterDocIdSet, flags); } // return a filtering top scorer @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) throws IOException { + assert filter != null; DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs); @@ -147,7 +148,8 @@ public class FilteredQuery extends Query { return null; } - return strategy.filteredBulkScorer(context, weight, scoreDocsInOrder, filterDocIdSet); + return strategy.filteredBulkScorer(context, weight, scoreDocsInOrder, filterDocIdSet, flags); + } }; } @@ -189,7 +191,6 @@ public class FilteredQuery extends Query { return scorerDoc = doc; } } - @Override public int docID() { return scorerDoc; @@ -202,7 +203,12 @@ public class FilteredQuery extends Query { @Override public int freq() throws IOException { return scorer.freq(); } - + + @Override + public int nextPosition() throws IOException { + return scorer.nextPosition(); + } + @Override public Collection getChildren() { return Collections.singleton(new ChildScorer(scorer, "FILTERED")); @@ -312,7 +318,12 @@ public class FilteredQuery extends Query { public final int freq() throws IOException { return scorer.freq(); } - + + @Override + public int nextPosition() throws IOException { + return scorer.nextPosition(); + } + @Override public final Collection getChildren() { return Collections.singleton(new ChildScorer(scorer, "FILTERED")); @@ -480,12 +491,13 @@ public class FilteredQuery extends Query { * the {@link AtomicReaderContext} for which to return the {@link Scorer}. * @param weight the {@link FilteredQuery} {@link Weight} to create the filtered scorer. * @param docIdSet the filter {@link DocIdSet} to apply + * @param flags the low level Posting Features for this scorer. * @return a filtered scorer * * @throws IOException if an {@link IOException} occurs */ public abstract Scorer filteredScorer(AtomicReaderContext context, - Weight weight, DocIdSet docIdSet) throws IOException; + Weight weight, DocIdSet docIdSet, int flags) throws IOException; /** * Returns a filtered {@link BulkScorer} based on this @@ -500,8 +512,8 @@ public class FilteredQuery extends Query { * @return a filtered top scorer */ public BulkScorer filteredBulkScorer(AtomicReaderContext context, - Weight weight, boolean scoreDocsInOrder, DocIdSet docIdSet) throws IOException { - Scorer scorer = filteredScorer(context, weight, docIdSet); + Weight weight, boolean scoreDocsInOrder, DocIdSet docIdSet, int flags) throws IOException { + Scorer scorer = filteredScorer(context, weight, docIdSet, flags); if (scorer == null) { return null; } @@ -509,6 +521,7 @@ public class FilteredQuery extends Query { // ignore scoreDocsInOrder: return new Weight.DefaultBulkScorer(scorer); } + } /** @@ -522,7 +535,7 @@ public class FilteredQuery extends Query { public static class RandomAccessFilterStrategy extends FilterStrategy { @Override - public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet) throws IOException { + public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet, int flags) throws IOException { final DocIdSetIterator filterIter = docIdSet.iterator(); if (filterIter == null) { // this means the filter does not accept any documents. @@ -539,12 +552,12 @@ public class FilteredQuery extends Query { final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc); if (useRandomAccess) { // if we are using random access, we return the inner scorer, just with other acceptDocs - return weight.scorer(context, filterAcceptDocs); + return weight.scorer(context, flags, filterAcceptDocs); } else { assert firstFilterDoc > -1; // we are gonna advance() this scorer, so we set inorder=true/toplevel=false // we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice - final Scorer scorer = weight.scorer(context, null); + final Scorer scorer = weight.scorer(context, flags, null); // TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer return (scorer == null) ? null : new PrimaryAdvancedLeapFrogScorer(weight, firstFilterDoc, filterIter, scorer); } @@ -578,14 +591,14 @@ public class FilteredQuery extends Query { @Override public Scorer filteredScorer(AtomicReaderContext context, - Weight weight, DocIdSet docIdSet) throws IOException { + Weight weight, DocIdSet docIdSet, int flags) throws IOException { final DocIdSetIterator filterIter = docIdSet.iterator(); if (filterIter == null) { // this means the filter does not accept any documents. return null; } // we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice - final Scorer scorer = weight.scorer(context, null); + final Scorer scorer = weight.scorer(context, flags, null); if (scorer == null) { return null; } @@ -615,30 +628,29 @@ public class FilteredQuery extends Query { @Override public Scorer filteredScorer(final AtomicReaderContext context, Weight weight, - DocIdSet docIdSet) throws IOException { + DocIdSet docIdSet, int flags) throws IOException { Bits filterAcceptDocs = docIdSet.bits(); if (filterAcceptDocs == null) { // Filter does not provide random-access Bits; we // must fallback to leapfrog: - return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet); + return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet, flags); } - final Scorer scorer = weight.scorer(context, null); - return scorer == null ? null : new QueryFirstScorer(weight, - filterAcceptDocs, scorer); + final Scorer scorer = weight.scorer(context, flags, null); + return scorer == null ? null : new QueryFirstScorer(weight, filterAcceptDocs, scorer); } @Override public BulkScorer filteredBulkScorer(final AtomicReaderContext context, Weight weight, boolean scoreDocsInOrder, // ignored (we always top-score in order) - DocIdSet docIdSet) throws IOException { + DocIdSet docIdSet, int flags) throws IOException { Bits filterAcceptDocs = docIdSet.bits(); if (filterAcceptDocs == null) { // Filter does not provide random-access Bits; we // must fallback to leapfrog: - return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredBulkScorer(context, weight, scoreDocsInOrder, docIdSet); + return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredBulkScorer(context, weight, scoreDocsInOrder, docIdSet, flags); } - final Scorer scorer = weight.scorer(context, null); + final Scorer scorer = weight.scorer(context, flags, null); return scorer == null ? null : new QueryFirstBulkScorer(scorer, filterAcceptDocs); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java index 3199966..8a18443 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java @@ -22,7 +22,7 @@ import java.util.ArrayList; import java.util.Comparator; import java.util.List; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FilteredTermsEnum; import org.apache.lucene.index.Term; @@ -271,8 +271,8 @@ public class FuzzyTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, - DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, + DocsEnum reuse, int flags) throws IOException { return actualEnum.docsAndPositions(liveDocs, reuse, flags); } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 116304e..e74ae81 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -17,6 +17,23 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.ReaderUtil; +import org.apache.lucene.index.StoredDocument; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.Terms; +import org.apache.lucene.search.similarities.DefaultSimilarity; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.util.ThreadInterruptedException; + import java.io.IOException; import java.util.Arrays; import java.util.Iterator; @@ -32,23 +49,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DirectoryReader; // javadocs -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiFields; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.index.StoredDocument; -import org.apache.lucene.index.StoredFieldVisitor; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; -import org.apache.lucene.index.Terms; -import org.apache.lucene.search.similarities.DefaultSimilarity; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.store.NIOFSDirectory; // javadoc -import org.apache.lucene.util.ThreadInterruptedException; -import org.apache.lucene.index.IndexWriter; // javadocs - /** Implements search over a single IndexReader. * *

Applications usually need only call the inherited @@ -608,7 +608,7 @@ public class IndexSearcher { // continue with the following leaf continue; } - BulkScorer scorer = weight.bulkScorer(ctx, !leafCollector.acceptsDocsOutOfOrder(), ctx.reader().getLiveDocs()); + BulkScorer scorer = weight.bulkScorer(ctx, !leafCollector.acceptsDocsOutOfOrder(), collector.postingFeatures(), ctx.reader().getLiveDocs()); if (scorer != null) { try { scorer.score(leafCollector); diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index 8f2edd7..987666b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -20,11 +20,11 @@ package org.apache.lucene.search; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.ToStringUtils; -import java.util.Set; import java.io.IOException; +import java.util.Set; /** * A query that matches all documents. @@ -73,6 +73,11 @@ public class MatchAllDocsQuery extends Query { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int advance(int target) throws IOException { doc = target-1; return nextDoc(); @@ -114,7 +119,7 @@ public class MatchAllDocsQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { return new MatchAllScorer(context.reader(), acceptDocs, this, queryWeight); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java index 3f31ace..eeb9711 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java @@ -17,14 +17,14 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.util.ArrayUtil; + import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.List; -import org.apache.lucene.util.ArrayUtil; - /** * A Scorer for OR like queries, counterpart of ConjunctionScorer. * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. @@ -62,6 +62,8 @@ class MinShouldMatchSumScorer extends Scorer { private final float coord[]; + private final PositionQueue posQueue; + /** * Construct a MinShouldMatchSumScorer. * @@ -110,6 +112,8 @@ class MinShouldMatchSumScorer extends Scorer { this.coord = coord; minheapHeapify(); assert minheapCheck(); + + posQueue = new PositionQueue(subScorers.toArray(new Scorer[subScorers.size()])); } @Override @@ -145,6 +149,7 @@ class MinShouldMatchSumScorer extends Scorer { break; } } + posQueue.advanceTo(doc); return doc; } @@ -231,6 +236,11 @@ class MinShouldMatchSumScorer extends Scorer { return nrMatchers; } + @Override + public int nextPosition() throws IOException { + return posQueue.nextPosition(); + } + /** * Advances to the first match beyond the current whose document number is * greater than or equal to a given target.
@@ -261,6 +271,7 @@ class MinShouldMatchSumScorer extends Scorer { evaluateSmallestDocInHeap(); if (nrMatchers >= mm) { + posQueue.advanceTo(doc); return doc; } else { return nextDoc(); diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java index 859b893..3a02e00 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java @@ -17,13 +17,11 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; + import java.io.IOException; import java.util.Arrays; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.Scorer; - /** * A {@link Collector} which allows running a search with several * {@link Collector}s. It offers a static {@link #wrap} method which accepts a @@ -103,6 +101,15 @@ public class MultiCollector implements Collector { return new MultiLeafCollector(leafCollectors); } + @Override + public int postingFeatures() { + int pf = 0; + for (Collector collector : collectors) { + pf |= collector.postingFeatures(); + } + return pf; + } + private static class MultiLeafCollector implements LeafCollector { diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 902e6aa..0b21469 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,12 +17,8 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.*; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; @@ -31,14 +27,28 @@ import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.search.PhraseQuery.TermDocsEnumFactory; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntroSorter; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Set; + /** * MultiPhraseQuery is a generalized version of PhraseQuery, with an added * method {@link #add(Term[])}. @@ -179,7 +189,7 @@ public class MultiPhraseQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { assert !termArrays.isEmpty(); final AtomicReader reader = context.reader(); final Bits liveDocs = acceptDocs; @@ -197,9 +207,9 @@ public class MultiPhraseQuery extends Query { for (int pos=0; pos 1) { postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum); @@ -221,6 +231,7 @@ public class MultiPhraseQuery extends Query { // None of the terms are in this reader return null; } + factory = new MultiTermDocsEnumFactory(liveDocs, context, terms, termContexts, termsEnum, flags); } else { final Term term = terms[0]; TermState termState = termContexts.get(term).get(context.ord); @@ -237,10 +248,10 @@ public class MultiPhraseQuery extends Query { throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); } - docFreq = termsEnum.docFreq(); + factory = new TermDocsEnumFactory(term.bytes(), termState, termsEnum, flags, acceptDocs); } - - postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms); + + postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, factory, termsEnum.docFreq() , positions.get(pos).intValue(), terms); } // sort by increasing docFreq order @@ -257,7 +268,7 @@ public class MultiPhraseQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, context.reader().getLiveDocs()); + Scorer scorer = scorer(context, DocsEnum.FLAG_POSITIONS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { @@ -401,6 +412,27 @@ public class MultiPhraseQuery extends Query { } return true; } + + private static class MultiTermDocsEnumFactory extends TermDocsEnumFactory { + + AtomicReaderContext context; + Term[] terms; + Map termContexts; + + MultiTermDocsEnumFactory(Bits liveDocs, AtomicReaderContext context, Term[] terms, + Map termContexts, TermsEnum termsEnum, int flags) throws IOException { + super(termsEnum, flags, liveDocs); + this.context = context; + this.terms = terms; + this.termContexts = termContexts; + } + + @Override + public DocsEnum docsAndPositionsEnum() throws IOException { + return new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum, flags); + } + + } } /** @@ -408,15 +440,15 @@ public class MultiPhraseQuery extends Query { */ // TODO: if ever we allow subclassing of the *PhraseScorer -class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { +class UnionDocsAndPositionsEnum extends DocsEnum { - private static final class DocsQueue extends PriorityQueue { - DocsQueue(List docsEnums) throws IOException { + private static final class DocsQueue extends PriorityQueue { + DocsQueue(List docsEnums) throws IOException { super(docsEnums.size()); - Iterator i = docsEnums.iterator(); + Iterator i = docsEnums.iterator(); while (i.hasNext()) { - DocsAndPositionsEnum postings = i.next(); + DocsEnum postings = i.next(); if (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { add(postings); } @@ -424,30 +456,46 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { } @Override - public final boolean lessThan(DocsAndPositionsEnum a, DocsAndPositionsEnum b) { + public final boolean lessThan(DocsEnum a, DocsEnum b) { return a.docID() < b.docID(); } } - private static final class IntQueue { - private int _arraySize = 16; + // TODO: Reimplement this as int[_arraySize * 3], storing position at i * 3, + // startOffset at i * 3 + 1 and endOffset at i * 3 + 2. Will need to also + // implement a new SorterTemplate to sort the array. + + private static final class PositionQueue { + private int _arraySize = 48; private int _index = 0; private int _lastIndex = 0; private int[] _array = new int[_arraySize]; - final void add(int i) { - if (_lastIndex == _arraySize) + final void add(int pos, int start, int end) { + if (_lastIndex * 3 == _arraySize) growArray(); - _array[_lastIndex++] = i; + _array[_lastIndex * 3] = pos; + _array[_lastIndex * 3 + 1] = start; + _array[_lastIndex * 3 + 2] = end; + _lastIndex += 1; } final int next() { - return _array[_index++]; + return _array[_index++ * 3]; + } + + final int startOffset() { + return _array[(_index - 1) * 3 + 1]; + } + + final int endOffset() { + return _array[(_index - 1) * 3 + 2]; } final void sort() { - Arrays.sort(_array, _index, _lastIndex); + //Arrays.sort(_array, _index, _lastIndex); + sorter.sort(_index, _lastIndex - 1); } final void clear() { @@ -465,16 +513,54 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { _array = newArray; _arraySize *= 2; } + + private IntroSorter sorter = new IntroSorter() { + private int pivot; + + @Override + protected void swap(int i, int j) { + int ti = _array[i * 3]; + int ts = _array[i * 3 + 1]; + int te = _array[i * 3 + 2]; + _array[i * 3] = _array[j * 3]; + _array[i * 3 + 1] = _array[j * 3 + 1]; + _array[i * 3 + 2] = _array[j * 3 + 2]; + _array[j * 3] = ti; + _array[j * 3 + 1] = ts; + _array[j * 3 + 2] = te; + } + + @Override + protected int compare(int i, int j) { + return _array[i * 3] - _array[j * 3]; + } + + @Override + protected void setPivot(int i) { + pivot = i; + } + + @Override + protected int comparePivot(int j) { + return pivot - _array[j * 3]; + } + }; } private int _doc = -1; private int _freq; private DocsQueue _queue; - private IntQueue _posList; + private PositionQueue _posList; + private int posPending; private long cost; - public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map termContexts, TermsEnum termsEnum) throws IOException { - List docsEnums = new LinkedList<>(); + public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, + Map termContexts, TermsEnum termsEnum) throws IOException { + this(liveDocs, context, terms, termContexts, termsEnum, DocsEnum.FLAG_POSITIONS); + } + + public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map termContexts, TermsEnum termsEnum, int flags) throws IOException { + List docsEnums = new LinkedList<>(); for (int i = 0; i < terms.length; i++) { final Term term = terms[i]; TermState termState = termContexts.get(term).get(context.ord); @@ -483,7 +569,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { continue; } termsEnum.seekExact(term.bytes(), termState); - DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE); + DocsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE); if (postings == null) { // term does exist, but has no positions throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); @@ -493,7 +579,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { } _queue = new DocsQueue(docsEnums); - _posList = new IntQueue(); + _posList = new PositionQueue(); } @Override @@ -509,13 +595,13 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { _doc = _queue.top().docID(); // merge sort all positions together - DocsAndPositionsEnum postings; + DocsEnum postings; do { postings = _queue.top(); final int freq = postings.freq(); for (int i = 0; i < freq; i++) { - _posList.add(postings.nextPosition()); + _posList.add(postings.nextPosition(), postings.startOffset(), postings.endOffset()); } if (postings.nextDoc() != NO_MORE_DOCS) { @@ -527,23 +613,27 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { _posList.sort(); _freq = _posList.size(); + posPending = _freq; return _doc; } @Override public int nextPosition() { + if (posPending == 0) + return NO_MORE_POSITIONS; + posPending--; return _posList.next(); } @Override public int startOffset() { - return -1; + return _posList.startOffset(); } @Override public int endOffset() { - return -1; + return _posList.endOffset(); } @Override @@ -554,7 +644,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { @Override public final int advance(int target) throws IOException { while (_queue.top() != null && target > _queue.top().docID()) { - DocsAndPositionsEnum postings = _queue.pop(); + DocsEnum postings = _queue.pop(); if (postings.advance(target) != NO_MORE_DOCS) { _queue.add(postings); } @@ -563,7 +653,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { } @Override - public final int freq() { + public final int freq() throws IOException { return _freq; } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java index c975b01..7faa453 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java @@ -17,8 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Term; + import java.io.IOException; -import org.apache.lucene.index.*; /** * Position of a term in a document that takes into account the term offset within the phrase. @@ -29,13 +31,13 @@ final class PhrasePositions { int count; // remaining pos in this doc int offset; // position in phrase final int ord; // unique across all PhrasePositions instances - final DocsAndPositionsEnum postings; // stream of docs & positions + final DocsEnum postings; // stream of docs & positions PhrasePositions next; // used to make lists int rptGroup = -1; // >=0 indicates that this is a repeating PP int rptInd; // index in the rptGroup final Term[] terms; // for repetitions initialization - PhrasePositions(DocsAndPositionsEnum postings, int o, int ord, Term[] terms) { + PhrasePositions(DocsEnum postings, int o, int ord, Term[] terms) { this.postings = postings; offset = o; this.ord = ord; @@ -44,6 +46,7 @@ final class PhrasePositions { final boolean next() throws IOException { // increments to next doc doc = postings.nextDoc(); + if (doc == DocIdSetIterator.NO_MORE_DOCS) { return false; } @@ -80,10 +83,14 @@ final class PhrasePositions { /** for debug purposes */ @Override public String toString() { - String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count; + String s = "d:"+doc+" offset:"+offset+" position:"+position+" c:"+count; if (rptGroup >=0 ) { s += " rpt:"+rptGroup+",i"+rptInd; } + s += " t: [" + terms[0]; + for (int i = 1; i < terms.length; i++) + s += "," + terms[1]; + s += "]"; return s; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index cdca801..46a51a0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,28 +17,28 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Set; - +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Set; + /** A Query that matches documents containing a particular sequence of terms. * A PhraseQuery is built by QueryParser for input like "new york". * @@ -138,13 +138,15 @@ public class PhraseQuery extends Query { } static class PostingsAndFreq implements Comparable { - final DocsAndPositionsEnum postings; + final TermDocsEnumFactory factory; + final DocsEnum postings; final int docFreq; final int position; final Term[] terms; final int nTerms; // for faster comparisons - public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term... terms) { + public PostingsAndFreq(DocsEnum postings, TermDocsEnumFactory factory, int docFreq, int position, Term... terms) throws IOException { + this.factory = factory; this.postings = postings; this.docFreq = docFreq; this.position = position; @@ -245,7 +247,7 @@ public class PhraseQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { assert !terms.isEmpty(); final AtomicReader reader = context.reader(); final Bits liveDocs = acceptDocs; @@ -267,7 +269,7 @@ public class PhraseQuery extends Query { return null; } te.seekExact(t.bytes(), state); - DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE); + DocsEnum postingsEnum = te.docs(liveDocs, null, DocsEnum.FLAG_POSITIONS); // PhraseQuery on a field that did not index // positions. @@ -276,7 +278,8 @@ public class PhraseQuery extends Query { // term does exist, but has no positions throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")"); } - postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t); + TermDocsEnumFactory factory = new TermDocsEnumFactory(t.bytes(), state, te, flags, acceptDocs); + postingsFreqs[i] = new PostingsAndFreq(postingsEnum, factory, te.docFreq(), positions.get(i).intValue(), t); } // sort by increasing docFreq order @@ -298,7 +301,7 @@ public class PhraseQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, context.reader().getLiveDocs()); + Scorer scorer = scorer(context, DocsEnum.FLAG_POSITIONS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { @@ -396,4 +399,33 @@ public class PhraseQuery extends Query { ^ positions.hashCode(); } + static class TermDocsEnumFactory { + protected final TermsEnum termsEnum; + protected final Bits liveDocs; + protected final int flags; + + private final BytesRef term; + private final TermState termState; + + TermDocsEnumFactory(TermsEnum termsEnum, int flags, Bits liveDocs) { + this(null, null, termsEnum, flags, liveDocs); + } + + TermDocsEnumFactory(BytesRef term, TermState termState, TermsEnum termsEnum, int flags, Bits liveDocs) { + this.termsEnum = termsEnum; + this.termState = termState; + this.liveDocs = liveDocs; + this.term = term; + this.flags = flags; + } + + + public DocsEnum docsAndPositionsEnum() + throws IOException { + assert term != null; + termsEnum.seekExact(term, termState); + return termsEnum.docsAndPositions(liveDocs, null, flags); + } + + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java index d2c8655..24a4815 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java @@ -42,4 +42,8 @@ final class PhraseQueue extends PriorityQueue { return pp1.doc < pp2.doc; } } + + public Object[] getPPs() { + return getHeapArray(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PositionQueue.java b/lucene/core/src/java/org/apache/lucene/search/PositionQueue.java new file mode 100644 index 0000000..3b57147 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/PositionQueue.java @@ -0,0 +1,127 @@ +package org.apache.lucene.search; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.posfilter.Interval; +import org.apache.lucene.util.PriorityQueue; + +import java.io.IOException; + +/** + * Copyright (c) 2013 Lemur Consulting Ltd. + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +public class PositionQueue extends PriorityQueue { + + public class DocsEnumRef { + + public final DocsEnum docsEnum; + public final int ord; + public Interval interval = new Interval(); + + public DocsEnumRef(DocsEnum docsEnum, int ord) { + this.docsEnum = docsEnum; + this.ord = ord; + } + + public int nextPosition() throws IOException { + assert docsEnum.docID() != -1; + if (docsEnum.docID() == DocsEnum.NO_MORE_DOCS || docsEnum.docID() != docId + || docsEnum.nextPosition() == DocsEnum.NO_MORE_POSITIONS) + interval.setMaximum(); + else + interval.update(this.docsEnum); + return interval.begin; + } + + } + + boolean positioned = false; + Interval current = new Interval(); + int docId = -1; + protected int queuesize; + + public PositionQueue(DocsEnum... subDocsEnums) { + super(subDocsEnums.length); + for (int i = 0; i < subDocsEnums.length; i++) { + add(new DocsEnumRef(subDocsEnums[i], i)); + } + queuesize = subDocsEnums.length; + } + + protected void init() throws IOException { + queuesize = 0; + for (Object scorerRef : getHeapArray()) { + if (scorerRef != null) { + ((DocsEnumRef) scorerRef).nextPosition(); + queuesize++; + } + } + updateTop(); + } + + public int nextPosition() throws IOException { + if (!positioned) { + init(); + positioned = true; + current.update(top().interval); + return current.begin; + } + if (current.begin == DocsEnum.NO_MORE_POSITIONS) + return DocsEnum.NO_MORE_POSITIONS; + if (top().nextPosition() == DocsEnum.NO_MORE_POSITIONS) + queuesize--; + updateInternalIntervals(); + updateTop(); + current.update(top().interval); + //System.out.println("PQ: " + current.toString()); + return current.begin; + } + + @Override + protected boolean lessThan(DocsEnumRef a, DocsEnumRef b) { + if (a.docsEnum.docID() < b.docsEnum.docID()) + return true; + if (a.docsEnum.docID() > b.docsEnum.docID()) + return false; + return a.interval.begin < b.interval.begin; + } + + protected void updateInternalIntervals() {} + + /** + * Must be called after the scorers have been advanced + */ + public void advanceTo(int doc) { + positioned = false; + this.docId = doc; + this.queuesize = this.size(); + } + + public int startPosition() throws IOException { + return current.begin; + } + + public int endPosition() throws IOException { + return current.end; + } + + public int startOffset() throws IOException { + return current.offsetBegin; + } + + public int endOffset() throws IOException { + return current.offsetEnd; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/PositionsCollector.java b/lucene/core/src/java/org/apache/lucene/search/PositionsCollector.java new file mode 100644 index 0000000..a573db5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/PositionsCollector.java @@ -0,0 +1,62 @@ +package org.apache.lucene.search; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.posfilter.Interval; + +import java.io.IOException; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public abstract class PositionsCollector extends SimpleCollector { + + private Scorer scorer; + private final boolean collectOffsets; + + protected PositionsCollector(boolean collectOffsets) { + this.collectOffsets = collectOffsets; + } + + protected PositionsCollector() { + this(false); + } + + @Override + public final void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + + @Override + public final void collect(int doc) throws IOException { + while (scorer.nextPosition() != DocsEnum.NO_MORE_POSITIONS) { + collectPosition(doc, new Interval(scorer)); + } + } + + protected abstract void collectPosition(int doc, Interval interval); + + @Override + public int postingFeatures() { + return collectOffsets ? DocsEnum.FLAG_OFFSETS : DocsEnum.FLAG_POSITIONS; + } + + @Override + public final boolean acceptsDocsOutOfOrder() { + return false; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java index 755c3cd..4aed657 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java @@ -17,13 +17,14 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; + import java.io.IOException; import java.util.Arrays; import java.util.Comparator; import java.util.List; -import org.apache.lucene.index.AtomicReaderContext; - /** A {@link Rescorer} that uses a provided Query to assign * scores to the first-pass hits. * @@ -82,7 +83,7 @@ public abstract class QueryRescorer extends Rescorer { if (readerContext != null) { // We advanced to another segment: docBase = readerContext.docBase; - scorer = weight.scorer(readerContext, null); + scorer = weight.scorer(readerContext, DocsEnum.FLAG_NONE, null); } if(scorer != null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java b/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java index 50bc03e..4a4a0f8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java @@ -17,11 +17,12 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.util.Bits; +import java.io.IOException; + /** * Constrains search results to only match those which also match a provided * query. @@ -56,7 +57,7 @@ public class QueryWrapperFilter extends Filter { return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { - return weight.scorer(privateContext, acceptDocs); + return weight.scorer(privateContext, DocsEnum.FLAG_FREQS, acceptDocs); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java index 4e2a5f1..b487238 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -110,6 +110,31 @@ class ReqExclScorer extends Scorer { } @Override + public int nextPosition() throws IOException { + return reqScorer.nextPosition(); + } + + @Override + public int startPosition() throws IOException { + return reqScorer.startPosition(); + } + + @Override + public int endPosition() throws IOException { + return reqScorer.endPosition(); + } + + @Override + public int startOffset() throws IOException { + return reqScorer.startOffset(); + } + + @Override + public int endOffset() throws IOException { + return reqScorer.endOffset(); + } + + @Override public Collection getChildren() { return Collections.singleton(new ChildScorer(reqScorer, "MUST")); } diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index d7b4d86..aa65ab3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -29,6 +29,7 @@ class ReqOptSumScorer extends Scorer { /** The scorers passed from the constructor. * These are set to null as soon as their next() or skipTo() returns false. */ + private PositionQueue posQueue; protected Scorer reqScorer; protected Scorer optScorer; @@ -45,16 +46,21 @@ class ReqOptSumScorer extends Scorer { assert optScorer != null; this.reqScorer = reqScorer; this.optScorer = optScorer; + posQueue = new PositionQueue(reqScorer, optScorer); } @Override public int nextDoc() throws IOException { - return reqScorer.nextDoc(); + int doc = reqScorer.nextDoc(); + posQueue.advanceTo(doc); + return doc; } @Override public int advance(int target) throws IOException { - return reqScorer.advance(target); + int doc = reqScorer.advance(target); + posQueue.advanceTo(doc); + return doc; } @Override @@ -93,6 +99,34 @@ class ReqOptSumScorer extends Scorer { } @Override + public int nextPosition() throws IOException { + int optDoc = optScorer.docID(); + if (optDoc < reqScorer.docID()) + optScorer.advance(reqScorer.docID()); + return posQueue.nextPosition(); + } + + @Override + public int startPosition() throws IOException { + return posQueue.startPosition(); + } + + @Override + public int endPosition() throws IOException { + return posQueue.endPosition(); + } + + @Override + public int startOffset() throws IOException { + return posQueue.startOffset(); + } + + @Override + public int endOffset() throws IOException { + return posQueue.endOffset(); + } + + @Override public Collection getChildren() { ArrayList children = new ArrayList<>(2); children.add(new ChildScorer(reqScorer, "MUST")); diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index 929d3b9..cca0808 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -17,12 +17,12 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.DocsEnum; + import java.io.IOException; import java.util.Collection; import java.util.Collections; -import org.apache.lucene.index.DocsEnum; - /** * Expert: Common scoring functionality for different types of queries. * @@ -67,6 +67,15 @@ public abstract class Scorer extends DocsEnum { public Weight getWeight() { return weight; } + + @Override + public String toString() { + try { + return String.format("%d:%d(%d)->%d(%d)", docID(), startPosition(), startOffset(), endPosition(), endOffset()); + } catch (IOException e) { + return String.format("Cannot retrieve position due to IOException"); + } + } /** Returns child sub-scorers * @lucene.experimental */ diff --git a/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java b/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java index 5803b2e..6cfc5be 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java @@ -17,9 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; + +import java.io.IOException; /** * Base {@link Collector} implementation that is used to collect all contexts. @@ -50,4 +51,9 @@ public abstract class SimpleCollector implements Collector, LeafCollector { @Override public abstract void collect(int doc) throws IOException; + @Override + public int postingFeatures() { + return DocsEnum.FLAG_FREQS; + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index 986ab06..cf6ae33 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -17,6 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.Term; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.FixedBitSet; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -25,9 +29,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.util.FixedBitSet; final class SloppyPhraseScorer extends Scorer { private PhrasePositions min, max; @@ -35,6 +36,7 @@ final class SloppyPhraseScorer extends Scorer { private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq(). private final Similarity.SimScorer docScorer; + private final PhraseQuery.PostingsAndFreq[] postings; private final int slop; private final int numPostings; @@ -55,6 +57,7 @@ final class SloppyPhraseScorer extends Scorer { int slop, Similarity.SimScorer docScorer) { super(weight); this.docScorer = docScorer; + this.postings = postings; this.slop = slop; this.numPostings = postings==null ? 0 : postings.length; pq = new PhraseQueue(postings.length); @@ -79,58 +82,78 @@ final class SloppyPhraseScorer extends Scorer { } } - /** - * Score a candidate doc for all slop-valid position-combinations (matches) - * encountered while traversing/hopping the PhrasePositions. - *
The score contribution of a match depends on the distance: - *
- highest score for distance=0 (exact match). - *
- score gets lower as distance gets higher. - *
Example: for query "a b"~2, a document "x a b a y" can be scored twice: - * once for "a b" (distance=0), and once for "b a" (distance=2). - *
Possibly not all valid combinations are encountered, because for efficiency - * we always propagate the least PhrasePosition. This allows to base on - * PriorityQueue and move forward faster. - * As result, for example, document "a b c b a" - * would score differently for queries "a b c"~4 and "c b a"~4, although - * they really are equivalent. - * Similarly, for doc "a b c b a f g", query "c b"~2 - * would get same score as "g f"~2, although "c b"~2 could be matched twice. - * We may want to fix this in the future (currently not, for performance reasons). - */ - private float phraseFreq() throws IOException { - if (!initPhrasePositions()) { - return 0.0f; + private int matchLength; + private int startpos = -1; + private int endpos = -1; + + @Override + public int nextPosition() throws IOException { + if (cached) { + cached = false; + return this.startPosition(); } - float freq = 0.0f; - numMatches = 0; - PhrasePositions pp = pq.pop(); - int matchLength = end - pp.position; - int next = pq.top().position; - while (advancePP(pp)) { - if (hasRpts && !advanceRpts(pp)) { + + if (pq.size() < postings.length) + return NO_MORE_POSITIONS; + + PhrasePositions top = pq.pop(); + matchLength = end - top.position; + int next = pq.top().position; + int pos = top.position + top.offset; + while (advancePP(top)) { + if (hasRpts && !advanceRpts(top)) break; // pps exhausted - } - if (pp.position > next) { // done minimizing current match-length + if (top.position > next) { // done minimizing current match-length if (matchLength <= slop) { - freq += docScorer.computeSlopFactor(matchLength); // score match - numMatches++; - } - pq.add(pp); - pp = pq.pop(); + setSpan(pos); + pq.add(top); + return startpos; + } + pq.add(top); + top = pq.pop(); next = pq.top().position; - matchLength = end - pp.position; - } else { - int matchLength2 = end - pp.position; - if (matchLength2 < matchLength) { + matchLength = end - top.position; + pos = top.position + top.offset; + } + else { + int matchLength2 = end - top.position; + pos = top.position + top.offset; + if (matchLength2 < matchLength) matchLength = matchLength2; - } } } + if (matchLength <= slop) { - freq += docScorer.computeSlopFactor(matchLength); // score match - numMatches++; - } - return freq; + setSpan(pos); + return startpos; + } + + return NO_MORE_POSITIONS; + + } + + private void setSpan(int topPos) { + startpos = topPos; + endpos = topPos; + for (Object o : pq.getPPs()) { + if (o == null) + continue; + PhrasePositions pp = (PhrasePositions) o; + startpos = Math.min(startpos, pp.position + pp.offset); + endpos = Math.max(endpos, pp.position + pp.offset); + } + } + + boolean cached = false; + + private int firstPosition() throws IOException { + if (!initPhrasePositions()) + return NO_MORE_POSITIONS; + + cached = false; + int pos = nextPosition(); + cached = true; + return pos; } /** advance a PhrasePosition and update 'end', return false if exhausted */ @@ -520,15 +543,61 @@ final class SloppyPhraseScorer extends Scorer { } @Override - public int freq() { - return numMatches; + public int freq() throws IOException { + int f = 0; + while (nextPosition() != NO_MORE_POSITIONS) { + f++; + } + return f; } - - float sloppyFreq() { - return sloppyFreq; + + /** + * Score a candidate doc for all slop-valid position-combinations (matches) + * encountered while traversing/hopping the PhrasePositions. + *
The score contribution of a match depends on the distance: + *
- highest score for distance=0 (exact match). + *
- score gets lower as distance gets higher. + *
Example: for query "a b"~2, a document "x a b a y" can be scored twice: + * once for "a b" (distance=0), and once for "b a" (distance=2). + *
Possibly not all valid combinations are encountered, because for efficiency + * we always propagate the least PhrasePosition. This allows to base on + * PriorityQueue and move forward faster. + * As result, for example, document "a b c b a" + * would score differently for queries "a b c"~4 and "c b a"~4, although + * they really are equivalent. + * Similarly, for doc "a b c b a f g", query "c b"~2 + * would get same score as "g f"~2, although "c b"~2 could be matched twice. + * We may want to fix this in the future (currently not, for performance reasons). + */ + float sloppyFreq() throws IOException { + float f = 0.0f; + while (nextPosition() != NO_MORE_POSITIONS) { + f += docScorer.computeSlopFactor(matchLength); + } + return f; } - -// private void printQueue(PrintStream ps, PhrasePositions ext, String title) { + + @Override + public int startOffset() throws IOException { + return -1; // nocommit + } + + @Override + public int endOffset() throws IOException { + return -1; // nocommit + } + + @Override + public int startPosition() throws IOException { + return startpos; + } + + @Override + public int endPosition() throws IOException { + return endpos; + } + + // private void printQueue(PrintStream ps, PhrasePositions ext, String title) { // //if (min.doc != ?) return; // ps.println(); // ps.println("---- "+title); @@ -570,8 +639,8 @@ final class SloppyPhraseScorer extends Scorer { } @Override - public float score() { - return docScorer.score(max.doc, sloppyFreq); + public float score() throws IOException { + return docScorer.score(max.doc, sloppyFreq()); } @Override @@ -587,9 +656,9 @@ final class SloppyPhraseScorer extends Scorer { } } // found a doc with all of the terms - sloppyFreq = phraseFreq(); // check for phrase + //sloppyFreq = phraseFreq(); // check for phrase target = min.doc + 1; // next target in case sloppyFreq is still 0 - } while (sloppyFreq == 0f); + } while (firstPosition() == NO_MORE_POSITIONS); // found a match return max.doc; @@ -602,4 +671,5 @@ final class SloppyPhraseScorer extends Scorer { @Override public String toString() { return "scorer(" + weight + ")"; } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 5435ccd..5174ee3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -17,71 +17,75 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.Set; - +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; -/** A Query that matches documents containing a term. - This may be combined with other terms with a {@link BooleanQuery}. - */ +import java.io.IOException; +import java.util.Set; + +/** + * A Query that matches documents containing a term. This may be combined with + * other terms with a {@link BooleanQuery}. + */ public class TermQuery extends Query { private final Term term; private final int docFreq; private final TermContext perReaderTermState; - + final class TermWeight extends Weight { private final Similarity similarity; private final Similarity.SimWeight stats; private final TermContext termStates; - + public TermWeight(IndexSearcher searcher, TermContext termStates) - throws IOException { + throws IOException { assert termStates != null : "TermContext must not be null"; this.termStates = termStates; this.similarity = searcher.getSimilarity(); - this.stats = similarity.computeWeight( - getBoost(), - searcher.collectionStatistics(term.field()), + this.stats = similarity.computeWeight(getBoost(), + searcher.collectionStatistics(term.field()), searcher.termStatistics(term, termStates)); } - + @Override - public String toString() { return "weight(" + TermQuery.this + ")"; } - + public String toString() { + return "weight(" + TermQuery.this + ")"; + } + @Override - public Query getQuery() { return TermQuery.this; } - + public Query getQuery() { + return TermQuery.this; + } + @Override public float getValueForNormalization() { return stats.getValueForNormalization(); } - + @Override public void normalize(float queryNorm, float topLevelBoost) { stats.normalize(queryNorm, topLevelBoost); } - + @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); final TermsEnum termsEnum = getTermsEnum(context); if (termsEnum == null) { return null; } - DocsEnum docs = termsEnum.docs(acceptDocs, null); + DocsEnum docs = termsEnum.docs(acceptDocs, null, flags); assert docs != null; return new TermScorer(this, docs, similarity.simScorer(stats, context)); } @@ -96,90 +100,100 @@ public class TermQuery extends Query { assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term; return null; } - //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null")); - final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null); + // System.out.println("LD=" + reader.getLiveDocs() + " set?=" + + // (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null")); + final TermsEnum termsEnum = context.reader().terms(term.field()) + .iterator(null); termsEnum.seekExact(term.bytes(), state); return termsEnum; } private boolean termNotInReader(AtomicReader reader, Term term) throws IOException { // only called from assert - //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString()); + // System.out.println("TQ.termNotInReader reader=" + reader + " term=" + + // field + ":" + bytes.utf8ToString()); return reader.docFreq(term) == 0; } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, context.reader().getLiveDocs()); + Scorer scorer = scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.freq(); SimScorer docScorer = similarity.simScorer(stats, context); ComplexExplanation result = new ComplexExplanation(); - result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); - Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq)); + result.setDescription("weight(" + getQuery() + " in " + doc + ") [" + + similarity.getClass().getSimpleName() + "], result of:"); + Explanation scoreExplanation = docScorer.explain(doc, + new Explanation(freq, "termFreq=" + freq)); result.addDetail(scoreExplanation); result.setValue(scoreExplanation.getValue()); result.setMatch(true); return result; } } - return new ComplexExplanation(false, 0.0f, "no matching term"); + return new ComplexExplanation(false, 0.0f, "no matching term"); } } - + /** Constructs a query for the term t. */ public TermQuery(Term t) { this(t, -1); } - - /** Expert: constructs a TermQuery that will use the - * provided docFreq instead of looking up the docFreq - * against the searcher. */ + + /** + * Expert: constructs a TermQuery that will use the provided docFreq instead + * of looking up the docFreq against the searcher. + */ public TermQuery(Term t, int docFreq) { term = t; this.docFreq = docFreq; perReaderTermState = null; } - /** Expert: constructs a TermQuery that will use the - * provided docFreq instead of looking up the docFreq - * against the searcher. */ + /** + * Expert: constructs a TermQuery that will use the provided docFreq instead + * of looking up the docFreq against the searcher. + */ public TermQuery(Term t, TermContext states) { assert states != null; term = t; docFreq = states.docFreq(); perReaderTermState = states; } - + /** Returns the term of this query. */ - public Term getTerm() { return term; } - + public Term getTerm() { + return term; + } + @Override public Weight createWeight(IndexSearcher searcher) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; - if (perReaderTermState == null || perReaderTermState.topReaderContext != context) { - // make TermQuery single-pass if we don't have a PRTS or if the context differs! + if (perReaderTermState == null + || perReaderTermState.topReaderContext != context) { + // make TermQuery single-pass if we don't have a PRTS or if the context + // differs! termState = TermContext.build(context, term); } else { - // PRTS was pre-build for this IS - termState = this.perReaderTermState; + // PRTS was pre-build for this IS + termState = this.perReaderTermState; } - + // we must not ignore the given docFreq - if set use the given value (lie) - if (docFreq != -1) - termState.setDocFreq(docFreq); + if (docFreq != -1) termState.setDocFreq(docFreq); return new TermWeight(searcher, termState); } - + @Override public void extractTerms(Set terms) { terms.add(getTerm()); } - + /** Prints a user-readable version of this query. */ @Override public String toString(String field) { @@ -192,21 +206,20 @@ public class TermQuery extends Query { buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } - + /** Returns true iff o is equal to this. */ @Override public boolean equals(Object o) { - if (!(o instanceof TermQuery)) - return false; - TermQuery other = (TermQuery)o; + if (!(o instanceof TermQuery)) return false; + TermQuery other = (TermQuery) o; return (this.getBoost() == other.getBoost()) - && this.term.equals(other.term); + && this.term.equals(other.term); } - - /** Returns a hash code value for this object.*/ + + /** Returns a hash code value for this object. */ @Override public int hashCode() { return Float.floatToIntBits(getBoost()) ^ term.hashCode(); } - + } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 6697524..71f15ee 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -17,10 +17,11 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; /** Expert: A Scorer for documents matching a Term. */ @@ -64,6 +65,36 @@ final class TermScorer extends Scorer { public int nextDoc() throws IOException { return docsEnum.nextDoc(); } + + @Override + public int nextPosition() throws IOException { + return docsEnum.nextPosition(); + } + + @Override + public int startPosition() throws IOException { + return docsEnum.startPosition(); + } + + @Override + public int endPosition() throws IOException { + return docsEnum.endPosition(); + } + + @Override + public int startOffset() throws IOException { + return docsEnum.startOffset(); + } + + @Override + public int endOffset() throws IOException { + return docsEnum.endOffset(); + } + + @Override + public BytesRef getPayload() throws IOException { + return docsEnum.getPayload(); + } @Override public float score() throws IOException { @@ -92,5 +123,16 @@ final class TermScorer extends Scorer { /** Returns a string representation of this TermScorer. */ @Override - public String toString() { return "scorer(" + weight + ")"; } + public String toString() { + return "scorer(" + weight + ")[" + super.toString() + "]"; + } + + // TODO: benchmark if the specialized conjunction really benefits + // from this, or if instead its from sorting by docFreq, or both + + DocsEnum getDocsEnum() { + return docsEnum; + } + + } diff --git a/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java b/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java index 9a08a2b..9baf702 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java @@ -156,7 +156,12 @@ public class TimeLimitingCollector implements Collector { }; } - + + @Override + public int postingFeatures() { + return collector.postingFeatures(); + } + /** * This is so the same timer can be used with a multi-phase search process such as grouping. * We don't want to create a new TimeLimitingCollector for each phase because that would diff --git a/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java b/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java index 4fc5be6..188d06c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java @@ -17,6 +17,7 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.DocsEnum; /** * Just counts the total number of hits. @@ -36,6 +37,12 @@ public class TotalHitCountCollector extends SimpleCollector { } @Override + public int postingFeatures() { + // we don't need frequencies here + return DocsEnum.FLAG_NONE; + } + + @Override public boolean acceptsDocsOutOfOrder() { return true; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 8398157..8122279 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -17,14 +17,14 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.index.AtomicReader; // javadocs +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReaderContext; // javadocs +import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; +import java.io.IOException; + /** * Expert: Calculate query weights and build query scorers. *

@@ -35,7 +35,7 @@ import org.apache.lucene.util.Bits; * {@link AtomicReader} dependent state should reside in the {@link Scorer}. *

* Since {@link Weight} creates {@link Scorer} instances for a given - * {@link AtomicReaderContext} ({@link #scorer(AtomicReaderContext, Bits)}) + * {@link AtomicReaderContext} ({@link #scorer(AtomicReaderContext, int, Bits)}) * callers must maintain the relationship between the searcher's top-level * {@link IndexReaderContext} and the context used to create a {@link Scorer}. *

@@ -50,7 +50,7 @@ import org.apache.lucene.util.Bits; *

  • The query normalization factor is passed to {@link #normalize(float, float)}. At * this point the weighting is complete. *
  • A Scorer is constructed by - * {@link #scorer(AtomicReaderContext, Bits)}. + * {@link #scorer(AtomicReaderContext, int, Bits)}. * * * @since 2.9 @@ -97,7 +97,7 @@ public abstract class Weight { * @return a {@link Scorer} which scores documents in/out-of order. * @throws IOException if there is a low-level I/O error */ - public abstract Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException; + public abstract Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException; /** * Optional method, to return a {@link BulkScorer} to @@ -126,9 +126,9 @@ public abstract class Weight { * passes them to a collector. * @throws IOException if there is a low-level I/O error */ - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) throws IOException { - Scorer scorer = scorer(context, acceptDocs); + Scorer scorer = scorer(context, flags, acceptDocs); if (scorer == null) { // No docs match return null; @@ -199,14 +199,13 @@ public abstract class Weight { * Returns true iff this implementation scores docs only out of order. This * method is used in conjunction with {@link Collector}'s * {@link LeafCollector#acceptsDocsOutOfOrder() acceptsDocsOutOfOrder} and - * {@link #bulkScorer(AtomicReaderContext, boolean, Bits)} to + * {@link #bulkScorer(AtomicReaderContext, boolean, int, Bits)} to * create a matching {@link Scorer} instance for a given {@link Collector}, or * vice versa. *

    * NOTE: the default implementation returns false, i.e. * the Scorer scores documents in-order. */ - public boolean scoresDocsOutOfOrder() { - return false; - } + public boolean scoresDocsOutOfOrder() { return false; } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/package.html b/lucene/core/src/java/org/apache/lucene/search/package.html index 1be51fb..1bba734 100644 --- a/lucene/core/src/java/org/apache/lucene/search/package.html +++ b/lucene/core/src/java/org/apache/lucene/search/package.html @@ -436,8 +436,8 @@ on the built-in available scoring models and extending or changing Similarity. that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will just defer to the Similarity's implementation: {@link org.apache.lucene.search.similarities.Similarity.SimWeight#normalize SimWeight#normalize(float,float)}.

  • - {@link org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.AtomicReaderContext, org.apache.lucene.util.Bits) - scorer(AtomicReaderContext context, Bits acceptDocs)} — + {@link org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.AtomicReaderContext, PostingFeatures flags, org.apache.lucene.util.Bits) + scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs)} — Construct a new {@link org.apache.lucene.search.Scorer Scorer} for this Weight. See The Scorer Class below for help defining a Scorer. As the name implies, the Scorer is responsible for doing the actual scoring of documents given the Query. diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index d2e924e..bf8eab4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.payloads; */ import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.ComplexExplanation; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Scorer; @@ -148,14 +149,14 @@ public class PayloadNearQuery extends SpanNearQuery { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity, similarity.simScorer(stats, context)); } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - PayloadNearSpanScorer scorer = (PayloadNearSpanScorer) scorer(context, context.reader().getLiveDocs()); + PayloadNearSpanScorer scorer = (PayloadNearSpanScorer) scorer(context, DocsEnum.FLAG_PAYLOADS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java index 04ecd80..67aeee2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -19,7 +19,7 @@ package org.apache.lucene.search.payloads; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; @@ -79,7 +79,7 @@ public class PayloadTermQuery extends SpanTermQuery { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context)); } @@ -120,7 +120,7 @@ public class PayloadTermQuery extends SpanTermQuery { protected void processPayload(Similarity similarity) throws IOException { if (termSpans.isPayloadAvailable()) { - final DocsAndPositionsEnum postings = termSpans.getPostings(); + final DocsEnum postings = termSpans.getPostings(); payload = postings.getPayload(); if (payload != null) { payloadScore = function.currentScore(doc, term.field(), @@ -176,7 +176,7 @@ public class PayloadTermQuery extends SpanTermQuery { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, context.reader().getLiveDocs()); + PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, DocsEnum.FLAG_POSITIONS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/BlockPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/BlockPhraseScorer.java new file mode 100644 index 0000000..a9d6e19 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/BlockPhraseScorer.java @@ -0,0 +1,70 @@ +package org.apache.lucene.search.posfilter; + +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.similarities.Similarity; + +import java.io.IOException; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class BlockPhraseScorer extends PositionFilteredScorer { + + private final Interval[] subIntervals; + + public BlockPhraseScorer(Scorer filteredScorer, Similarity.SimScorer simScorer) { + super(filteredScorer, simScorer); + subIntervals = new Interval[subScorers.length]; + for (int i = 0; i < subScorers.length; i++) { + subIntervals[i] = new Interval(); + } + } + + @Override + public void reset(int doc) throws IOException { + super.reset(doc); + for (int i = 0; i < subScorers.length; i++) { + subIntervals[i].reset(); + } + } + + @Override + protected int doNextPosition() throws IOException { + if (subScorers[0].nextPosition() == NO_MORE_POSITIONS) + return NO_MORE_POSITIONS; + subIntervals[0].update(subScorers[0]); + int i = 1; + while (i < subScorers.length) { + while (subIntervals[i].begin <= subIntervals[i - 1].end) { + if (subScorers[i].nextPosition() == NO_MORE_POSITIONS) + return NO_MORE_POSITIONS; + subIntervals[i].update(subScorers[i]); + } + if (subIntervals[i].begin == subIntervals[i - 1].end + 1) { + i++; + } + else { + if (subScorers[0].nextPosition() == NO_MORE_POSITIONS) + return NO_MORE_POSITIONS; + subIntervals[0].update(subScorers[0]); + i = 1; + } + } + current.update(subIntervals[0], subIntervals[subScorers.length - 1]); + return subScorers[0].startPosition(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/Interval.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/Interval.java new file mode 100644 index 0000000..b29f85c --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/Interval.java @@ -0,0 +1,200 @@ +package org.apache.lucene.search.posfilter; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocsEnum; + +import java.io.IOException; + +/** + * Represents a section of a document that matches a query + */ +public class Interval implements Cloneable { + + /** The position of the start of this Interval */ + public int begin; + + /** The position of the end of this Interval */ + public int end; + + /** The offset of the start of this Interval */ + public int offsetBegin; + + /** The offset of the end of this Interval */ + public int offsetEnd; + + /** An interval that will always compare as less than any other interval */ + public static final Interval INFINITE_INTERVAL = new Interval(); + + /** + * Constructs a new Interval + * @param begin the start position + * @param end the end position + * @param offsetBegin the start offset + * @param offsetEnd the end offset + */ + public Interval(int begin, int end, int offsetBegin, int offsetEnd) { + this.begin = begin; + this.end = end; + this.offsetBegin = offsetBegin; + this.offsetEnd = offsetEnd; + } + + /** + * Constructs a new Interval with no initial values. This + * will always compare as less than any other Interval. + */ + public Interval() { + this(Integer.MIN_VALUE, Integer.MIN_VALUE, -1, -1); + } + + public Interval(DocsEnum docsEnum) throws IOException { + this.begin = docsEnum.startPosition(); + this.end = docsEnum.endPosition(); + this.offsetBegin = docsEnum.startOffset(); + this.offsetEnd = docsEnum.endOffset(); + } + + /** + * Update to span the range defined by two other Intervals. + * @param start the first Interval + * @param end the second Interval + */ + public void update(Interval start, Interval end) { + this.begin = start.begin; + this.offsetBegin = start.offsetBegin; + this.end = end.end; + this.offsetEnd = end.offsetEnd; + } + + /** + * Compare with another Interval. + * @param other the comparator + * @return true if both start and end positions are less than + * the comparator. + */ + public boolean lessThanExclusive(Interval other) { + return begin < other.begin && end < other.end; + } + + /** + * Compare with another Interval. + * @param other the comparator + * @return true if both start and end positions are less than + * or equal to the comparator's. + */ + public boolean lessThan(Interval other) { + return begin <= other.begin && end <= other.end; + } + + /** + * Compare with another Interval + * @param other the comparator + * @return true if both start and end positions are greater then + * the comparator's. + */ + public boolean greaterThanExclusive(Interval other) { + return begin > other.begin && end > other.end; + } + + /** + * Compare with another Interval + * @param other the comparator + * @return true if both start and end positions are greater then + * of equal to the comparator's. + */ + public boolean greaterThan(Interval other) { + return begin >= other.begin && end >= other.end; + } + + /** + * Compare with another Interval + * @param other the comparator + * @return true if this Interval contains the comparator + */ + public boolean contains(Interval other) { + return begin <= other.begin && other.end <= end; + } + + /** + * Compare with another Interval to find overlaps + * @param other + * @return true if the two intervals overlap + */ + public boolean overlaps(Interval other) { + return this.contains(other) || other.contains(this); + } + + /** + * Set all values of this Interval to be equal to another's + * @param other the Interval to copy + */ + public void copy(Interval other) { + begin = other.begin; + end = other.end; + offsetBegin = other.offsetBegin; + offsetEnd = other.offsetEnd; + } + + /** + * Set to a state that will always compare as less than any + * other Interval. + */ + public void reset() { + offsetBegin = offsetEnd = -1; + begin = end = Integer.MIN_VALUE; + } + + /** + * Set to a state that will always compare as more than any + * other Interval. + */ + public void setMaximum() { + offsetBegin = offsetEnd = -1; + begin = end = Integer.MAX_VALUE; + } + + @Override + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(); // should not happen + } + } + + @Override + public String toString() { + return "Interval [begin=" + begin + "(" + offsetBegin + "), end=" + + end + "(" + offsetEnd + ")]"; + } + + public void update(DocsEnum docsEnum) throws IOException { + offsetBegin = docsEnum.startOffset(); + offsetEnd = docsEnum.endOffset(); + begin = docsEnum.startPosition(); + end = docsEnum.endPosition(); + } + + public void update(Interval interval) { + this.begin = interval.begin; + this.end = interval.end; + this.offsetBegin = interval.offsetBegin; + this.offsetEnd = interval.offsetEnd; + } + +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/NonOverlappingQuery.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/NonOverlappingQuery.java new file mode 100644 index 0000000..a6f340f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/NonOverlappingQuery.java @@ -0,0 +1,204 @@ +package org.apache.lucene.search.posfilter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.util.Set; + +/** + * A Query that matches documents containing an interval (the minuend) that + * does not contain another interval (the subtrahend). + * + * As an example, given the following {@link org.apache.lucene.search.BooleanQuery}: + *
    + *   BooleanQuery bq = new BooleanQuery();
    + *   bq.add(new TermQuery(new Term(field, "quick")), BooleanQuery.Occur.MUST);
    + *   bq.add(new TermQuery(new Term(field, "fox")), BooleanQuery.Occur.MUST);
    + * 
    + * + * The document "the quick brown fox" will be matched by this query. But + * create a NonOverlappingQuery using this query as a minuend: + *
    + *   NonOverlappingQuery brq = new NonOverlappingQuery(bq, new TermQuery(new Term(field, "brown")));
    + * 
    + * + * This query will not match "the quick brown fox", because "brown" is found + * within the interval of the boolean query for "quick" and "fox. The query + * will match "the quick fox is brown", because here "brown" is outside + * the minuend's interval. + * + * N.B. Positions must be included in the index for this query to work + * + * Implements the Brouwerian operator as defined in "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + * + * @lucene.experimental + */ +public final class NonOverlappingQuery extends PositionFilterQuery { + + private Query subtrahend; + + /** + * Constructs a Query that matches documents containing intervals of the minuend + * that are not subtended by the subtrahend + * @param minuend the minuend Query + * @param subtrahend the subtrahend Query + */ + public NonOverlappingQuery(Query minuend, Query subtrahend) { + super(minuend, new BrouwerianScorerFactory(subtrahend)); + this.subtrahend = subtrahend; + } + + @Override + public void extractTerms(Set terms) { + super.extractTerms(terms); + subtrahend.extractTerms(terms); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query rewrittenMinuend = innerQuery.rewrite(reader); + Query rewrittenSubtrahend = subtrahend.rewrite(reader); + if (rewrittenMinuend != innerQuery || rewrittenSubtrahend != subtrahend) { + return new NonOverlappingQuery(rewrittenMinuend, rewrittenSubtrahend); + } + return this; + } + + private static class BrouwerianScorerFactory implements ScorerFilterFactory { + + private final Query subtrahend; + + BrouwerianScorerFactory(Query subtrahend) { + this.subtrahend = subtrahend; + } + + @Override + public Scorer scorer(Scorer filteredScorer, Similarity.SimScorer simScorer) { + throw new UnsupportedOperationException(); + } + + @Override + public String getName() { + return "NonOverlapping[" + subtrahend.toString() + "]/"; + } + } + + @Override + public Weight createWeight(IndexSearcher searcher) throws IOException { + return new BrouwerianWeight(innerQuery.createWeight(searcher), + subtrahend.createWeight(searcher), searcher); + } + + class BrouwerianWeight extends ScorerFilterWeight { + + private final Weight subtrahendWeight; + + public BrouwerianWeight(Weight minuendWeight, Weight subtrahendWeight, IndexSearcher searcher) + throws IOException { + super(minuendWeight, searcher); + this.subtrahendWeight = subtrahendWeight; + } + + @Override + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { + return new BrouwerianScorer(innerWeight.scorer(context, flags, acceptDocs), + subtrahendWeight.scorer(context, flags, acceptDocs), + similarity.simScorer(stats, context)); + } + } + + static class BrouwerianScorer extends PositionFilteredScorer { + + private final Scorer subtrahend; + private Interval subtInterval = new Interval(); + private int subtPosition = -1; + + BrouwerianScorer(Scorer minuend, Scorer subtrahend, Similarity.SimScorer simScorer) { + super(minuend, simScorer); + this.subtrahend = subtrahend; + } + + @Override + protected void reset(int doc) throws IOException { + super.reset(doc); + if (this.subtrahend == null || this.subtrahend.advance(doc) != doc) + subtPosition = NO_MORE_POSITIONS; + else + subtPosition = -1; + this.subtInterval.reset(); + } + + @Override + protected int doNextPosition() throws IOException { + if (subtPosition == NO_MORE_POSITIONS) { + int pos = child.nextPosition(); + if (pos != NO_MORE_POSITIONS) + current.update(child); + return pos; + } + while (child.nextPosition() != NO_MORE_POSITIONS) { + current.update(child); + while (subtInterval.lessThanExclusive(current) && + (subtPosition = subtrahend.nextPosition()) != NO_MORE_POSITIONS) { + subtInterval.update(subtrahend); + } + if (subtPosition == NO_MORE_POSITIONS || !current.overlaps(subtInterval)) + return current.begin; + } + return NO_MORE_POSITIONS; + } + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((innerQuery == null) ? 0 : innerQuery.hashCode()); + result = prime * result + + ((subtrahend == null) ? 0 : subtrahend.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + NonOverlappingQuery other = (NonOverlappingQuery) obj; + if (innerQuery == null) { + if (other.innerQuery != null) return false; + } else if (!innerQuery.equals(other.innerQuery)) return false; + if (subtrahend == null) { + if (other.subtrahend != null) return false; + } else if (!subtrahend.equals(other.subtrahend)) return false; + return true; + } + +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/OrderedNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/OrderedNearQuery.java new file mode 100644 index 0000000..27d3348 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/OrderedNearQuery.java @@ -0,0 +1,139 @@ +package org.apache.lucene.search.posfilter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.similarities.Similarity; + +import java.io.IOException; + +/** + * A query that matches if a set of subqueries also match, and are within + * a given distance of each other within the document. The subqueries + * must appear in the document in order. + * + * N.B. Positions must be included in the index for this query to work + * + * Implements the AND< operator as defined in "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + * + * @lucene.experimental + */ + +public class OrderedNearQuery extends PositionFilterQuery { + + /** + * Constructs an OrderedNearQuery + * @param slop the maximum distance between the subquery matches + * @param subqueries the subqueries to match. + */ + public OrderedNearQuery(int slop, Query... subqueries) { + super(buildBooleanQuery(subqueries), new OrderedNearScorerFactory(slop)); + } + + private static class OrderedNearScorerFactory implements ScorerFilterFactory { + + private final int slop; + + public OrderedNearScorerFactory(int slop) { + this.slop = slop; + } + + @Override + public Scorer scorer(Scorer filteredScorer, Similarity.SimScorer simScorer) { + return new WithinFilteredScorer(new OrderedNearScorer(filteredScorer, simScorer), slop, simScorer); + } + + @Override + public String getName() { + return "OrderedNear/" + slop; + } + } + + private static class OrderedNearScorer extends PositionFilteredScorer { + + private final int lastiter; + + private int index = 1; + private Interval[] intervals; + + public OrderedNearScorer(Scorer filteredScorer, Similarity.SimScorer simScorer) { + super(filteredScorer, simScorer); + intervals = new Interval[subScorers.length]; + for (int i = 0; i < subScorers.length; i++) { + intervals[i] = new Interval(); + } + lastiter = intervals.length - 1; + } + + @Override + public int freq() throws IOException { + return 1; // nocommit + } + + @Override + protected void reset(int doc) throws IOException { + for (int i = 0; i < subScorers.length; i++) { + assert subScorers[i].docID() == doc; + intervals[i].update(Interval.INFINITE_INTERVAL); + } + if (subScorers[0].nextPosition() == NO_MORE_POSITIONS) + intervals[0].setMaximum(); + else + intervals[0].update(subScorers[0]); + index = 1; + } + + @Override + protected int doNextPosition() throws IOException { + if (intervals[0].begin == NO_MORE_POSITIONS) + return NO_MORE_POSITIONS; + current.setMaximum(); + int b = Integer.MAX_VALUE; + while (true) { + while (true) { + final Interval previous = intervals[index - 1]; + if (previous.end >= b) { + return current.begin; + } + if (index == intervals.length || intervals[index].begin > previous.end) + break; + Interval scratch = intervals[index]; + do { + if (scratch.end >= b || subScorers[index].nextPosition() == NO_MORE_POSITIONS) + return current.begin; + intervals[index].update(subScorers[index]); + scratch = intervals[index]; + } while (scratch.begin <= previous.end); + index++; + } + current.update(intervals[0], intervals[lastiter]); + matchDistance = (intervals[lastiter].begin - lastiter) - intervals[0].end; + b = intervals[lastiter].begin; + index = 1; + if (subScorers[0].nextPosition() == NO_MORE_POSITIONS) { + intervals[0].setMaximum(); + return current.begin; + } + intervals[0].update(subScorers[0]); + } + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/PositionFilterQuery.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/PositionFilterQuery.java new file mode 100644 index 0000000..0f93b3a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/PositionFilterQuery.java @@ -0,0 +1,164 @@ +package org.apache.lucene.search.posfilter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ComplexExplanation; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.util.Set; +import java.util.TreeSet; + +public class PositionFilterQuery extends Query { + + protected final Query innerQuery; + protected final ScorerFilterFactory scorerFilterFactory; + + public PositionFilterQuery(Query innerQuery, ScorerFilterFactory scorerFilterFactory) { + this.innerQuery = innerQuery; + this.scorerFilterFactory = scorerFilterFactory; + } + + protected static BooleanQuery buildBooleanQuery(Query... queries) { + BooleanQuery bq = new BooleanQuery(); + for (Query q : queries) { + bq.add(q, BooleanClause.Occur.MUST); + } + return bq; + } + + @Override + public void extractTerms(Set terms) { + innerQuery.extractTerms(terms); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query rewritten = innerQuery.rewrite(reader); + if (rewritten != innerQuery) { + return new PositionFilterQuery(rewritten, scorerFilterFactory); + } + return this; + } + + @Override + public Weight createWeight(IndexSearcher searcher) throws IOException { + return new ScorerFilterWeight(innerQuery.createWeight(searcher), searcher); + } + + @Override + public String toString(String field) { + return scorerFilterFactory.getName() + "[" + innerQuery.toString() + "]"; + } + + public class ScorerFilterWeight extends Weight { + + protected final Weight innerWeight; + protected final Similarity similarity; + protected final Similarity.SimWeight stats; + + public ScorerFilterWeight(Weight innerWeight, IndexSearcher searcher) throws IOException { + this.innerWeight = innerWeight; + this.similarity = searcher.getSimilarity(); + this.stats = getSimWeight(innerWeight.getQuery(), searcher); + } + + private Similarity.SimWeight getSimWeight(Query query, IndexSearcher searcher) throws IOException { + TreeSet terms = new TreeSet(); + query.extractTerms(terms); + if (terms.size() == 0) + return null; + int i = 0; + TermStatistics[] termStats = new TermStatistics[terms.size()]; + for (Term term : terms) { + TermContext state = TermContext.build(searcher.getTopReaderContext(), term); + termStats[i] = searcher.termStatistics(term, state); + i++; + } + final String field = terms.first().field(); // nocommit - should we be checking all filtered terms + // are on the same field? + return similarity.computeWeight(query.getBoost(), searcher.collectionStatistics(field), termStats); + } + + @Override + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + Scorer scorer = scorer(context, DocsEnum.FLAG_POSITIONS, context.reader().getLiveDocs()); + if (scorer != null) { + int newDoc = scorer.advance(doc); + if (newDoc == doc) { + float freq = scorer.freq(); + Similarity.SimScorer docScorer = similarity.simScorer(stats, context); + ComplexExplanation result = new ComplexExplanation(); + result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); + Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); + result.addDetail(scoreExplanation); + result.setValue(scoreExplanation.getValue()); + result.setMatch(true); + return result; + } + } + return new ComplexExplanation(false, 0.0f, + "No matching term within position filter"); + } + + @Override + public Query getQuery() { + return PositionFilterQuery.this; + } + + @Override + public float getValueForNormalization() throws IOException { + return stats == null ? 1.0f : stats.getValueForNormalization(); + } + + @Override + public void normalize(float norm, float topLevelBoost) { + if (stats != null) + stats.normalize(norm, topLevelBoost); + } + + @Override + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { + Scorer filteredScorer = innerWeight.scorer(context, flags | DocsEnum.FLAG_POSITIONS, acceptDocs); + return filteredScorer == null ? null + : scorerFilterFactory.scorer(filteredScorer, similarity.simScorer(stats, context)); + } + } + + public static interface ScorerFilterFactory { + + public Scorer scorer(Scorer filteredScorer, Similarity.SimScorer simScorer); + + public String getName(); + + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/PositionFilteredScorer.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/PositionFilteredScorer.java new file mode 100644 index 0000000..8dfbb43 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/PositionFilteredScorer.java @@ -0,0 +1,137 @@ +package org.apache.lucene.search.posfilter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.similarities.Similarity; + +import java.io.IOException; + +public abstract class PositionFilteredScorer extends Scorer { + + protected final Scorer[] subScorers; + protected final Scorer child; + protected final Interval current = new Interval(); + protected final Similarity.SimScorer simScorer; + protected int matchDistance; + + private boolean buffered; + + public PositionFilteredScorer(Scorer filteredScorer, Similarity.SimScorer simScorer) { + super(filteredScorer.getWeight()); + this.simScorer = simScorer; + child = filteredScorer; + subScorers = new Scorer[filteredScorer.getChildren().size()]; + int i = 0; + for (ChildScorer subScorer : filteredScorer.getChildren()) { + subScorers[i++] = subScorer.child; + } + } + + @Override + public float score() throws IOException { + return this.simScorer.score(docID(), freq()); + } + + @Override + public int docID() { + return child.docID(); + } + + @Override + public int freq() throws IOException { + int freq = 0; + while (nextPosition() != NO_MORE_POSITIONS) { + freq++; + } + return freq; + } + + @Override + public int nextDoc() throws IOException { + while (child.nextDoc() != NO_MORE_DOCS) { + reset(child.docID()); + if (nextPosition() != NO_MORE_POSITIONS) { + buffered = true; + return child.docID(); + } + } + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) throws IOException { + if (child.advance(target) == NO_MORE_DOCS) + return NO_MORE_DOCS; + do { + reset(child.docID()); + if (nextPosition() != NO_MORE_POSITIONS) { + buffered = true; + return child.docID(); + } + } while (child.nextDoc() != NO_MORE_DOCS); + return NO_MORE_DOCS; + } + + @Override + public int nextPosition() throws IOException { + if (buffered) { + //System.out.println(this.hashCode() + ": returning buffered nextPos"); + buffered = false; + return current.begin; + } + //System.out.println(this.hashCode() + ": returning unbuffered nextPos"); + return doNextPosition(); + } + + protected abstract int doNextPosition() throws IOException; + + protected void reset(int doc) throws IOException { + buffered = false; + }; + + public int getMatchDistance() { + return matchDistance; + } + + @Override + public int startPosition() throws IOException { + return current.begin; + } + + @Override + public int endPosition() throws IOException { + return current.end; + } + + @Override + public int startOffset() throws IOException { + return current.offsetBegin; + } + + @Override + public int endOffset() throws IOException { + return current.offsetEnd; + } + + @Override + public long cost() { + return child.cost(); + } +// nocommit Payloads - need to add these to Interval? +} diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/RangeFilterQuery.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/RangeFilterQuery.java new file mode 100644 index 0000000..63b0d1d --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/RangeFilterQuery.java @@ -0,0 +1,82 @@ +package org.apache.lucene.search.posfilter; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.similarities.Similarity; + +import java.io.IOException; + +/** + * Copyright (c) 2012 Lemur Consulting Ltd. + *

    + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class RangeFilterQuery extends PositionFilterQuery { + + public RangeFilterQuery(int start, int end, Query innerQuery) { + super(innerQuery, new RangeFilterScorerFactory(start, end)); + } + + public RangeFilterQuery(int end, Query innerQuery) { + this(0, end, innerQuery); + } + + private static class RangeFilterScorerFactory implements ScorerFilterFactory { + + private final int start; + private final int end; + + public RangeFilterScorerFactory(int start, int end) { + this.start = start; + this.end = end; + } + + @Override + public Scorer scorer(Scorer filteredScorer, Similarity.SimScorer simScorer) { + return new RangeFilterScorer(start, end, filteredScorer, simScorer); + } + + @Override + public String getName() { + return "RangeFilter(" + start + "," + end + ")"; + } + } + + private static class RangeFilterScorer extends PositionFilteredScorer { + + private final int start; + private final int end; + + public RangeFilterScorer(int start, int end, Scorer filteredScorer, Similarity.SimScorer simScorer) { + super(filteredScorer, simScorer); + this.start = start; + this.end = end; + } + + @Override + protected int doNextPosition() throws IOException { + int position; + while ((position = child.nextPosition()) != NO_MORE_POSITIONS) { + if (position > end) + return NO_MORE_POSITIONS; + if (position >= start) { + current.update(child); + return position; + } + } + return NO_MORE_POSITIONS; + } + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/UnorderedNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/UnorderedNearQuery.java new file mode 100644 index 0000000..e6944bb --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/UnorderedNearQuery.java @@ -0,0 +1,187 @@ +package org.apache.lucene.search.posfilter; + +/** + * Copyright (c) 2012 Lemur Consulting Ltd. + *

    + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.PositionQueue; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.similarities.Similarity; + +import java.io.IOException; + +/** + * A query that matches if a set of subqueries also match, and are within + * a given distance of each other within the document. The subqueries + * may appear in the document in any order. + * + * N.B. Positions must be included in the index for this query to work + * + * Implements the LOWPASSk operator as defined in "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + * + * @lucene.experimental + */ + +public class UnorderedNearQuery extends PositionFilterQuery { + + /** + * Constructs an OrderedNearQuery + * @param slop the maximum distance between the subquery matches + * @param subqueries the subqueries to match. + */ + public UnorderedNearQuery(int slop, Query... subqueries) { + super(buildBooleanQuery(subqueries), new UnorderedNearScorerFactory(slop)); + } + + private static class UnorderedNearScorerFactory implements ScorerFilterFactory { + + private final int slop; + + UnorderedNearScorerFactory(int slop) { + this.slop = slop; + } + + @Override + public Scorer scorer(Scorer filteredScorer, Similarity.SimScorer simScorer) { + return new WithinFilteredScorer(new UnorderedNearScorer(filteredScorer, simScorer), slop, simScorer); + } + + @Override + public String getName() { + return "UnorderedNear/" + slop; + } + } + + private static class UnorderedNearScorer extends PositionFilteredScorer { + + SpanningPositionQueue posQueue; + + public UnorderedNearScorer(Scorer filteredScorer, Similarity.SimScorer simScorer) { + super(filteredScorer, simScorer); + posQueue = new SpanningPositionQueue(subScorers); + } + + @Override + protected int doNextPosition() throws IOException { + while (posQueue.isFull() && posQueue.span.begin == current.begin) { + posQueue.nextPosition(); + } + if (!posQueue.isFull()) + return NO_MORE_POSITIONS; + do { + //current.update(posQueue.top().interval, posQueue.span); + posQueue.updateCurrent(current); + if (current.equals(posQueue.top().interval)) + return current.begin; + matchDistance = posQueue.getMatchDistance(); + posQueue.nextPosition(); + } while (posQueue.isFull() && current.end == posQueue.span.end); + return current.begin; + } + + @Override + protected void reset(int doc) throws IOException { + super.reset(doc); + current.reset(); + posQueue.advanceTo(doc); + } + + } + + private static class SpanningPositionQueue extends PositionQueue { + + Interval span = new Interval(); + int scorerCount; + int firstIntervalEnd; + int lastIntervalBegin; + + public SpanningPositionQueue(Scorer[] subScorers) { + super(subScorers); + scorerCount = subScorers.length; + } + + public int getMatchDistance() { + return lastIntervalBegin - firstIntervalEnd - scorerCount + 1; + } + + public boolean isFull() { + return queuesize == scorerCount; + } + + public void updateCurrent(Interval current) { + final Interval top = this.top().interval; + current.update(top, span); + this.firstIntervalEnd = top.end; + } + + private void updateRightExtreme(Interval newRight) { + if (span.end <= newRight.end) { + span.update(span, newRight); + this.lastIntervalBegin = newRight.begin; + } + } + + protected void updateInternalIntervals() { + updateRightExtreme(top().interval); + } + + @Override + public int nextPosition() throws IOException { + int position; + if ((position = super.nextPosition()) == DocsEnum.NO_MORE_POSITIONS) { + return DocsEnum.NO_MORE_POSITIONS; + } + span.update(top().interval, span); + return position; + } + + @Override + protected void init() throws IOException { + super.init(); + for (Object docsEnumRef : getHeapArray()) { + if (docsEnumRef != null) { + final Interval i = ((DocsEnumRef) docsEnumRef).interval; + updateRightExtreme(i); + } + } + } + + @Override + public void advanceTo(int doc) { + super.advanceTo(doc); + span.reset(); + firstIntervalEnd = lastIntervalBegin = span.begin; + } + + @Override + protected boolean lessThan(DocsEnumRef left, DocsEnumRef right) { + final Interval a = left.interval; + final Interval b = right.interval; + return a.begin < b.begin || (a.begin == b.begin && a.end > b.end); + } + + @Override + public String toString() { + return top().interval.toString(); + } + } + + +} + diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/WithinFilteredScorer.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/WithinFilteredScorer.java new file mode 100644 index 0000000..6b85f8c --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/WithinFilteredScorer.java @@ -0,0 +1,47 @@ +package org.apache.lucene.search.posfilter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.similarities.Similarity; + +import java.io.IOException; + +public class WithinFilteredScorer extends PositionFilteredScorer { + + private final int slop; + private final PositionFilteredScorer wrappedScorer; + + public WithinFilteredScorer(PositionFilteredScorer wrappedScorer, int slop, Similarity.SimScorer simScorer) { + super(wrappedScorer, simScorer); + this.slop = slop; + this.wrappedScorer = wrappedScorer; + } + + @Override + protected int doNextPosition() throws IOException { + int position; + while ((position = wrappedScorer.nextPosition()) != NO_MORE_POSITIONS) { + if (wrappedScorer.getMatchDistance() <= slop) { + current.update(wrappedScorer); + return position; + } + } + return NO_MORE_POSITIONS; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java index b4ff8bb..0703287 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java @@ -17,9 +17,7 @@ package org.apache.lucene.search.similarities; * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.index.AtomicReader; // javadoc +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.search.BooleanQuery; @@ -29,9 +27,11 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermStatistics; -import org.apache.lucene.search.spans.SpanQuery; // javadoc +import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.SmallFloat; // javadoc +import org.apache.lucene.util.SmallFloat; + +import java.io.IOException; /** * Similarity defines the components of Lucene scoring. diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 74a098d..e78a8ff 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -17,12 +17,12 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.search.Weight; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; import org.apache.lucene.search.similarities.Similarity; +import java.io.IOException; + /** * Public for extension only. */ @@ -96,16 +96,22 @@ public class SpanScorer extends Scorer { public int freq() throws IOException { return numMatches; } - + + @Override + public int nextPosition() throws IOException { + return -1; // nocommit maybe I can coerce this into working? + } + /** Returns the intermediate "sloppy freq" adjusted for edit distance * @lucene.internal */ // only public so .payloads can see it. public float sloppyFreq() throws IOException { return freq; } - + @Override public long cost() { return spans.cost(); } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index f0a27c4..385f62b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -20,7 +20,7 @@ package org.apache.lucene.search.spans; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Fields; import org.apache.lucene.index.Term; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; @@ -120,7 +120,7 @@ public class SpanTermQuery extends SpanQuery { final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null); termsEnum.seekExact(term.bytes(), state); - final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS); + final DocsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsEnum.FLAG_PAYLOADS); if (postings != null) { return new TermSpans(postings, term); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index 0b20cdb..55b3714 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.spans; */ import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; @@ -81,7 +82,7 @@ public class SpanWeight extends Weight { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { if (stats == null) { return null; } else { @@ -91,7 +92,7 @@ public class SpanWeight extends Weight { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - SpanScorer scorer = (SpanScorer) scorer(context, context.reader().getLiveDocs()); + SpanScorer scorer = (SpanScorer) scorer(context, DocsEnum.FLAG_POSITIONS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java index d4974a5..39f72eb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java @@ -17,7 +17,7 @@ package org.apache.lucene.search.spans; import org.apache.lucene.index.Term; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; @@ -30,7 +30,7 @@ import java.util.Collection; * Public for extension only */ public class TermSpans extends Spans { - protected final DocsAndPositionsEnum postings; + protected final DocsEnum postings; protected final Term term; protected int doc; protected int freq; @@ -38,7 +38,7 @@ public class TermSpans extends Spans { protected int position; protected boolean readPayload; - public TermSpans(DocsAndPositionsEnum postings, Term term) { + public TermSpans(DocsEnum postings, Term term) { this.postings = postings; this.term = term; doc = -1; @@ -132,7 +132,7 @@ public class TermSpans extends Spans { (doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position); } - public DocsAndPositionsEnum getPostings() { + public DocsEnum getPostings() { return postings; } diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java b/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java index af307bb..c421033 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java @@ -26,7 +26,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; @@ -72,7 +72,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase { writer.addDocument(doc); IndexReader reader = writer.getReader(); - DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, + DocsEnum termPositions = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "preanalyzed", new BytesRef("term1")); diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java b/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java index 957b4d1..a001e9d 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java @@ -17,16 +17,11 @@ package org.apache.lucene.analysis; * limitations under the License. */ -import java.io.Reader; -import java.io.StringReader; -import java.util.Arrays; -import java.util.Random; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Fields; import org.apache.lucene.index.RandomIndexWriter; @@ -34,12 +29,17 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.TestUtil; -import org.apache.lucene.util.automaton.AutomatonTestUtil; import org.apache.lucene.util.automaton.Automata; -import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.AutomatonTestUtil; import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.RegExp; +import java.io.Reader; +import java.io.StringReader; +import java.util.Arrays; +import java.util.Random; + public class TestMockAnalyzer extends BaseTokenStreamTestCase { /** Test a configuration that behaves a lot like WhitespaceAnalyzer */ @@ -319,7 +319,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { final Terms terms = fields.terms("f"); final TermsEnum te = terms.iterator(null); assertEquals(new BytesRef("a"), te.next()); - final DocsAndPositionsEnum dpe = te.docsAndPositions(null, null); + final DocsEnum dpe = te.docsAndPositions(null, null); assertEquals(0, dpe.nextDoc()); assertEquals(2, dpe.freq()); assertEquals(0, dpe.nextPosition()); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java index cf92df3..7a55406 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java @@ -38,7 +38,6 @@ import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -283,8 +282,8 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep) throws Exception { BytesRef term; Bits randomBits = new RandomBits(MAXDOC, random().nextDouble(), random()); - DocsAndPositionsEnum leftPositions = null; - DocsAndPositionsEnum rightPositions = null; + DocsEnum leftPositions = null; + DocsEnum rightPositions = null; DocsEnum leftDocs = null; DocsEnum rightDocs = null; @@ -305,30 +304,30 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions)); // with payloads only - assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), - rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); - assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), - rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); + assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsEnum.FLAG_PAYLOADS), + rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsEnum.FLAG_PAYLOADS)); + assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_PAYLOADS), + rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_PAYLOADS)); assertPositionsSkipping(leftTermsEnum.docFreq(), - leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), - rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); + leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsEnum.FLAG_PAYLOADS), + rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsEnum.FLAG_PAYLOADS)); assertPositionsSkipping(leftTermsEnum.docFreq(), - leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS), - rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS)); + leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_PAYLOADS), + rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_PAYLOADS)); // with offsets only - assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), - rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); - assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), - rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); + assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsEnum.FLAG_OFFSETS), + rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsEnum.FLAG_OFFSETS)); + assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_OFFSETS), + rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_OFFSETS)); assertPositionsSkipping(leftTermsEnum.docFreq(), - leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), - rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); + leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsEnum.FLAG_OFFSETS), + rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsEnum.FLAG_OFFSETS)); assertPositionsSkipping(leftTermsEnum.docFreq(), - leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS), - rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS)); + leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_OFFSETS), + rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_OFFSETS)); // with positions only assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE), @@ -388,7 +387,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { /** * checks docs + freqs + positions + payloads, sequentially */ - public void assertDocsAndPositionsEnum(DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws Exception { + public void assertDocsAndPositionsEnum(DocsEnum leftDocs, DocsEnum rightDocs) throws Exception { if (leftDocs == null || rightDocs == null) { assertNull(leftDocs); assertNull(rightDocs); @@ -461,7 +460,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { /** * checks advancing docs + positions */ - public void assertPositionsSkipping(int docFreq, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws Exception { + public void assertPositionsSkipping(int docFreq, DocsEnum leftDocs, DocsEnum rightDocs) throws Exception { if (leftDocs == null || rightDocs == null) { assertNull(leftDocs); assertNull(rightDocs); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java index e0a0a88..6be01f8 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java @@ -17,12 +17,6 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Random; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldsConsumer; @@ -47,6 +41,12 @@ import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.Version; import org.junit.BeforeClass; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Random; + // TODO: test multiple codecs here? // TODO @@ -370,7 +370,7 @@ public class TestCodecs extends LuceneTestCase { assertTrue(doc != DocIdSetIterator.NO_MORE_DOCS); assertEquals(docs[i], doc); if (doPos) { - this.verifyPositions(positions[i], ((DocsAndPositionsEnum) docsEnum)); + this.verifyPositions(positions[i], docsEnum); } } assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc()); @@ -378,7 +378,7 @@ public class TestCodecs extends LuceneTestCase { byte[] data = new byte[10]; - private void verifyPositions(final PositionData[] positions, final DocsAndPositionsEnum posEnum) throws Throwable { + private void verifyPositions(final PositionData[] positions, final DocsEnum posEnum) throws Throwable { for(int i=0;i hits = new ArrayList<>(); @@ -252,7 +253,7 @@ public class TestBooleanQuery extends LuceneTestCase { for(int iter2=0;iter2<10;iter2++) { weight = s.createNormalizedWeight(q); - scorer = weight.scorer(s.leafContexts.get(0), null); + scorer = weight.scorer(s.leafContexts.get(0), DocsEnum.FLAG_FREQS, null); if (VERBOSE) { System.out.println(" iter2=" + iter2); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java index 15e42fe..e953c94 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java @@ -17,20 +17,13 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; @@ -40,6 +33,14 @@ import org.apache.lucene.search.Scorer.ChildScorer; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + // TODO: refactor to a base class, that collects freqs from the scorer tree // and test all queries with it public class TestBooleanQueryVisitSubscorers extends LuceneTestCase { @@ -254,6 +255,11 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase { }; } + @Override + public int postingFeatures() { + return DocsEnum.FLAG_NONE; + } + private static void summarizeScorer(final StringBuilder builder, final Scorer scorer, final int indent) { builder.append(scorer.getClass().getSimpleName()); if (scorer instanceof TermScorer) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index f1ed5bc..f8fd5f3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -17,12 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; @@ -35,6 +29,12 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + public class TestBooleanScorer extends LuceneTestCase { private static final String FIELD = "category"; @@ -207,12 +207,12 @@ public class TestBooleanScorer extends LuceneTestCase { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) { throw new UnsupportedOperationException(); } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) { return new BulkScorer() { @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java index e842909..470de1d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java @@ -17,10 +17,11 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; + public class TestCachingCollector extends LuceneTestCase { private static final double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB @@ -38,6 +39,11 @@ public class TestCachingCollector extends LuceneTestCase { public int freq() throws IOException { return 0; } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int docID() { return 0; } @Override @@ -45,7 +51,7 @@ public class TestCachingCollector extends LuceneTestCase { @Override public int advance(int target) throws IOException { return 0; } - + @Override public long cost() { return 1; @@ -170,7 +176,7 @@ public class TestCachingCollector extends LuceneTestCase { public void testNoWrappedCollector() throws Exception { for (boolean cacheScores : new boolean[] { false, true }) { // create w/ null wrapped collector, and test that the methods work - CachingCollector cc = CachingCollector.create(true, cacheScores, 50 * ONE_BYTE); + CachingCollector cc = CachingCollector.create(true, DocsEnum.FLAG_NONE, cacheScores, 50 * ONE_BYTE); LeafCollector acc = cc.getLeafCollector(null); acc.setScorer(new MockScorer()); acc.collect(0); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java index 7c7aaa6..93e2454 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java @@ -17,21 +17,21 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.document.Field; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.SlowCompositeReaderWrapper; -import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.DefaultSimilarity; @@ -39,11 +39,12 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.util.Locale; -import java.io.IOException; /** * Test of the DisjunctionMaxQuery. @@ -180,7 +181,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { assertTrue(s.getTopReaderContext() instanceof AtomicReaderContext); final Weight dw = s.createNormalizedWeight(dq); AtomicReaderContext context = (AtomicReaderContext)s.getTopReaderContext(); - final Scorer ds = dw.scorer(context, context.reader().getLiveDocs()); + final Scorer ds = dw.scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); final boolean skipOk = ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS; if (skipOk) { fail("firsttime skipTo found a match? ... " @@ -196,7 +197,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { QueryUtils.check(random(), dq, s); final Weight dw = s.createNormalizedWeight(dq); AtomicReaderContext context = (AtomicReaderContext)s.getTopReaderContext(); - final Scorer ds = dw.scorer(context, context.reader().getLiveDocs()); + final Scorer ds = dw.scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); assertTrue("firsttime skipTo found no match", ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS); assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id")); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java index d34f495..ed9a1fb 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java @@ -17,20 +17,13 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; @@ -46,6 +39,14 @@ import org.apache.lucene.util.TestUtil; import org.junit.AfterClass; import org.junit.BeforeClass; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + /** tests BooleanScorer2's minShouldMatch */ public class TestMinShouldMatch2 extends LuceneTestCase { static Directory dir; @@ -124,7 +125,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase { if (slow) { return new SlowMinShouldMatchScorer(weight, reader, searcher); } else { - return weight.scorer(reader.getContext(), null); + return weight.scorer(reader.getContext(), DocsEnum.FLAG_FREQS, null); } } @@ -315,6 +316,11 @@ public class TestMinShouldMatch2 extends LuceneTestCase { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int docID() { return currentDoc; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java index bd512e8..496dadd 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -31,7 +31,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.MultiFields; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowCompositeReaderWrapper; @@ -100,7 +100,7 @@ public class TestPositionIncrement extends LuceneTestCase { IndexSearcher searcher = newSearcher(reader); - DocsAndPositionsEnum pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(), + DocsEnum pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(), MultiFields.getLiveDocs(searcher.getIndexReader()), "field", new BytesRef("1")); @@ -212,7 +212,7 @@ public class TestPositionIncrement extends LuceneTestCase { final IndexReader readerFromWriter = writer.getReader(); AtomicReader r = SlowCompositeReaderWrapper.wrap(readerFromWriter); - DocsAndPositionsEnum tp = r.termPositionsEnum(new Term("content", "a")); + DocsEnum tp = r.termPositionsEnum(new Term("content", "a")); int count = 0; assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java index 4a51978..ca889e7 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -24,6 +24,8 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.document.Document; +import java.io.IOException; + public class TestPositiveScoresOnlyCollector extends LuceneTestCase { private static final class SimpleScorer extends Scorer { @@ -41,6 +43,11 @@ public class TestPositiveScoresOnlyCollector extends LuceneTestCase { return 1; } + @Override + public int nextPosition() throws IOException { + return -1; + } + @Override public int docID() { return idx; } @Override public int nextDoc() { @@ -51,7 +58,7 @@ public class TestPositiveScoresOnlyCollector extends LuceneTestCase { idx = target; return idx < scores.length ? idx : NO_MORE_DOCS; } - + @Override public long cost() { return scores.length; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java index 63b3075..be68e66 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java @@ -17,11 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; -import java.util.Comparator; -import java.util.Set; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; @@ -39,6 +34,11 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; +import java.io.IOException; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Set; + public class TestQueryRescorer extends LuceneTestCase { private IndexSearcher getSearcher(IndexReader r) { @@ -443,7 +443,7 @@ public class TestQueryRescorer extends LuceneTestCase { } @Override - public Scorer scorer(final AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(final AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { return new Scorer(null) { int docID = -1; @@ -459,6 +459,11 @@ public class TestQueryRescorer extends LuceneTestCase { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public long cost() { return 1; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java index b082da9..44c7e32 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -17,14 +17,14 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; + public class TestScoreCachingWrappingScorer extends LuceneTestCase { private static final class SimpleScorer extends Scorer { @@ -47,6 +47,11 @@ public class TestScoreCachingWrappingScorer extends LuceneTestCase { return 1; } + @Override + public int nextPosition() throws IOException { + return -1; + } + @Override public int docID() { return doc; } @Override public int nextDoc() { @@ -57,7 +62,7 @@ public class TestScoreCachingWrappingScorer extends LuceneTestCase { doc = target; return doc < scores.length ? doc : NO_MORE_DOCS; } - + @Override public long cost() { return scores.length; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java index c33e35a..3a01583 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java @@ -17,14 +17,11 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowCompositeReaderWrapper; @@ -33,6 +30,10 @@ import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + public class TestTermScorer extends LuceneTestCase { protected Directory directory; private static final String FIELD = "field"; @@ -78,7 +79,7 @@ public class TestTermScorer extends LuceneTestCase { Weight weight = indexSearcher.createNormalizedWeight(termQuery); assertTrue(indexSearcher.getTopReaderContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext)indexSearcher.getTopReaderContext(); - BulkScorer ts = weight.bulkScorer(context, true, context.reader().getLiveDocs()); + BulkScorer ts = weight.bulkScorer(context, true, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); // we have 2 documents with the term all in them, one document for all the // other values final List docs = new ArrayList<>(); @@ -140,7 +141,7 @@ public class TestTermScorer extends LuceneTestCase { Weight weight = indexSearcher.createNormalizedWeight(termQuery); assertTrue(indexSearcher.getTopReaderContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext) indexSearcher.getTopReaderContext(); - Scorer ts = weight.scorer(context, context.reader().getLiveDocs()); + Scorer ts = weight.scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); assertTrue("next did not return a doc", ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue("score is not correct", ts.score() == 1.6931472f); @@ -159,7 +160,7 @@ public class TestTermScorer extends LuceneTestCase { Weight weight = indexSearcher.createNormalizedWeight(termQuery); assertTrue(indexSearcher.getTopReaderContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext) indexSearcher.getTopReaderContext(); - Scorer ts = weight.scorer(context, context.reader().getLiveDocs()); + Scorer ts = weight.scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); assertTrue("Didn't skip", ts.advance(3) != DocIdSetIterator.NO_MORE_DOCS); // The next doc should be doc 5 assertTrue("doc should be number 5", ts.docID() == 5); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java b/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java index bac1e39..4ca9c34 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java @@ -17,8 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; @@ -36,6 +34,8 @@ import org.apache.lucene.util.LuceneTestCase; import org.junit.AfterClass; import org.junit.BeforeClass; +import java.io.IOException; + public class TestTermVectors extends LuceneTestCase { private static IndexReader reader; private static Directory directory; diff --git a/lucene/core/src/test/org/apache/lucene/search/posfilter/IntervalTestBase.java b/lucene/core/src/test/org/apache/lucene/search/posfilter/IntervalTestBase.java new file mode 100644 index 0000000..886cdca --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/posfilter/IntervalTestBase.java @@ -0,0 +1,259 @@ +package org.apache.lucene.search.posfilter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PositionsCollector; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; + +import java.io.IOException; + +public abstract class IntervalTestBase extends LuceneTestCase { + + protected Directory directory; + protected IndexReader reader; + protected IndexSearcher searcher; + + public static class AssertingPositionsCollector extends PositionsCollector { + + enum AssertionType { POSITIONS, OFFSETS } + + private final int[][] expectedResults; + private final AssertionType type; + + private int docUpto = -1; + private int posUpto = -1; + + private int currentDoc = -1; + private int posRemaining = 0; + + public AssertingPositionsCollector(int[][] expectedResults, AssertionType type) { + this.expectedResults = expectedResults; + this.type = type; + } + + @Override + public int postingFeatures() { + if (type == AssertionType.POSITIONS) + return DocsEnum.FLAG_POSITIONS; + else + return DocsEnum.FLAG_OFFSETS; + } + + @Override + protected void collectPosition(int doc, Interval interval) { + + if (doc != currentDoc) { + if (posRemaining > 0) { + int missingPos = expectedResults[docUpto].length - (posRemaining * 2); + fail("Missing expected hit in document " + expectedResults[docUpto][0] + ": [" + + expectedResults[docUpto][missingPos] + ", " + expectedResults[docUpto][missingPos + 1] + "]"); + } + docUpto++; + if (docUpto > expectedResults.length - 1) + fail("Unexpected hit in document " + doc + ": " + interval.toString()); + + currentDoc = expectedResults[docUpto][0]; + posUpto = -1; + posRemaining = (expectedResults[docUpto].length - 1) / 2; + } + + if (doc != currentDoc) + fail("Expected next hit in document " + currentDoc + " but was in " + doc + ": " + interval.toString()); + + posUpto++; + posRemaining--; + + if (posUpto > ((expectedResults[docUpto].length - 1) / 2) - 1) + fail("Unexpected hit in document " + doc + ": " + interval.toString()); + + if (type == AssertionType.POSITIONS) { + if (expectedResults[docUpto][posUpto * 2 + 1] != interval.begin || + expectedResults[docUpto][posUpto * 2 + 2] != interval.end) + fail("Expected next position in document to be [" + expectedResults[docUpto][posUpto * 2 + 1] + ", " + + expectedResults[docUpto][posUpto * 2 + 2] + "] but was [" + interval.begin + ", " + interval.end + "]"); + } + else { + // check offsets + if (expectedResults[docUpto][posUpto * 2 + 1] != interval.offsetBegin || + expectedResults[docUpto][posUpto * 2 + 2] != interval.offsetEnd) + fail("Expected next offset in document to be [" + expectedResults[docUpto][posUpto * 2 + 1] + ", " + + expectedResults[docUpto][posUpto * 2 + 2] + "] but was [" + interval.offsetBegin + ", " + interval.offsetEnd + "]"); + } + } + + public void assertAllMatched() { + if (docUpto < expectedResults.length - 1) { + fail("Expected a hit in document " + expectedResults[docUpto + 1][0]); + } + } + } + + /** + * Run a query against a searcher, and check that the collected intervals from the query match + * the expected results. + * @param q the query + * @param searcher the searcher + * @param expectedResults an int[][] detailing the expected results, in the format + * { { docid1, startoffset1, endoffset1, startoffset2, endoffset2, ... }, + * { docid2, startoffset1, endoffset1, startoffset2, endoffset2, ...}, ... } + * @throws IOException + */ + public static void checkIntervalOffsets(Query q, IndexSearcher searcher, int[][] expectedResults) throws IOException { + + //MatchCollector m = new MatchCollector(); + AssertingPositionsCollector c = new AssertingPositionsCollector(expectedResults, AssertingPositionsCollector.AssertionType.OFFSETS); + searcher.search(q, c); + c.assertAllMatched(); + + } + + /** + * Run a query against a searcher, and check that the collected intervals from the query match + * the expected results. + * @param q the query + * @param searcher the searcher + * @param expectedResults an int[][] detailing the expected results, in the format + * { { docid1, startpos1, endpos1, startpos2, endpos2, ... }, + * { docid2, startpos1, endpos1, startpos2, endpos2, ...}, ... } + * @throws IOException + */ + public static void checkIntervals(Query q, IndexSearcher searcher, int[][] expectedResults) throws IOException { + + AssertingPositionsCollector c = new AssertingPositionsCollector(expectedResults, AssertingPositionsCollector.AssertionType.POSITIONS); + searcher.search(q, c); + c.assertAllMatched(); + + } + + public static void checkScores(Query q, IndexSearcher searcher, int... expectedDocs) throws IOException { + TopDocs hits = searcher.search(q, 1000); + Assert.assertEquals("Wrong number of hits", expectedDocs.length, hits.totalHits); + for (int i = 0; i < expectedDocs.length; i++) { + Assert.assertEquals("Docs not scored in order", expectedDocs[i], hits.scoreDocs[i].doc); + } + CheckHits.checkExplanations(q, "field", searcher); + } + + protected abstract void addDocs(RandomIndexWriter writer) throws IOException; + + @Before + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random())); + //config.setCodec(Codec.forName("SimpleText")); + //config.setCodec(Codec.forName("Asserting")); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, config); + addDocs(writer); + reader = writer.getReader(); + writer.close(); + searcher = new IndexSearcher(reader); + } + + @After + public void tearDown() throws Exception { + reader.close(); + directory.close(); + super.tearDown(); + } + + public TermQuery makeTermQuery(String text) { + return new TermQuery(new Term(TestBasicIntervals.field, text)); + } + + protected Query makeOrQuery(Query... queries) { + BooleanQuery q = new BooleanQuery(); + for (Query subquery : queries) { + q.add(subquery, BooleanClause.Occur.SHOULD); + } + return q; + } + + protected Query makeAndQuery(Query... queries) { + BooleanQuery q = new BooleanQuery(); + for (Query subquery : queries) { + q.add(subquery, BooleanClause.Occur.MUST); + } + return q; + } + + protected Query makeBooleanQuery(BooleanClause... clauses) { + BooleanQuery q = new BooleanQuery(); + for (BooleanClause clause : clauses) { + q.add(clause); + } + return q; + } + + protected BooleanClause makeBooleanClause(String text, BooleanClause.Occur occur) { + return new BooleanClause(makeTermQuery(text), occur); + } + + public static class Match implements Comparable { + + public final int docid; + public final int start; + public final int end; + public final int startOffset; + public final int endOffset; + public final boolean composite; + + public Match(int docid, Interval interval, boolean composite) { + this.docid = docid; + this.start = interval.begin; + this.end = interval.end; + this.startOffset = interval.offsetBegin; + this.endOffset = interval.offsetEnd; + this.composite = composite; + } + + @Override + public int compareTo(Match o) { + if (this.docid != o.docid) + return this.docid - o.docid; + if (this.start != o.start) + return this.start - o.start; + return o.end - this.end; + } + + @Override + public String toString() { + return String.format("%d:%d[%d]->%d[%d]%s", + docid, start, startOffset, end, endOffset, composite ? "C" : ""); + } + } + + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/posfilter/TestBasicIntervals.java b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestBasicIntervals.java new file mode 100644 index 0000000..793f6d9 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestBasicIntervals.java @@ -0,0 +1,191 @@ +package org.apache.lucene.search.posfilter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; + +import java.io.IOException; + +public class TestBasicIntervals extends IntervalTestBase { + + public static final String field = "field"; + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + for (String content : docFields) { + Document doc = new Document(); + doc.add(newField(field, content, TextField.TYPE_NOT_STORED)); + writer.addDocument(doc); + } + } + + private String[] docFields = { + "w1 w2 w3 w4 w5", //0 + "w1 w3 w2 w3",//1 + "w1 xx w2 yy w3",//2 + "w1 w3 xx w2 yy w3",//3 + "u2 u2 u1", //4 + "u2 xx u2 u1",//5 + "u2 u2 xx u1", //6 + "u2 xx u2 yy u1", //7 + "u2 xx u1 u2",//8 + "u1 u2 xx u2",//9 + "u2 u1 xx u2",//10 + "t1 t2 t1 t3 t2 t3",//11 + "v1 v2 v3",//12 + "v1 v3 v2 v3 v4",//13 + "v4 v2 v2 v4",//14 + "v3 v4 v3"};//15 + + public void testSimpleConjunction() throws IOException { + Query q = makeAndQuery(makeTermQuery("v2"), makeTermQuery("v4")); + checkIntervals(q, searcher, new int[][]{ + { 13, 2, 2, 4, 4 }, + { 14, 0, 0, 1, 1, 2, 2, 3, 3 } + }); + } + + public void testExclusion() throws IOException { + Query q = makeBooleanQuery(makeBooleanClause("v2", BooleanClause.Occur.MUST), + makeBooleanClause("v3", BooleanClause.Occur.MUST_NOT)); + checkIntervals(q, searcher, new int[][]{ + { 14, 1, 1, 2, 2 } + }); + } + + public void testOptExclusion() throws IOException { + Query q = makeBooleanQuery(makeBooleanClause("w2", BooleanClause.Occur.SHOULD), + makeBooleanClause("w3", BooleanClause.Occur.SHOULD), + makeBooleanClause("xx", BooleanClause.Occur.MUST_NOT)); + checkIntervals(q, searcher, new int[][]{ + { 0, 1, 1, 2, 2 }, + { 1, 1, 1, 2, 2, 3, 3 } + }); + } + + public void testNestedConjunctions() throws IOException { + Query q = makeAndQuery(makeTermQuery("v2"), makeOrQuery(makeTermQuery("v3"), makeTermQuery("v4"))); + checkIntervals(q, searcher, new int[][]{ + { 12, 1, 1, 2, 2 }, + { 13, 1, 1, 2, 2, 3, 3, 4, 4 }, + { 14, 0, 0, 1, 1, 2, 2, 3, 3 } + }); + } + + public void testSingleRequiredManyOptional() throws IOException { + Query q = makeBooleanQuery(makeBooleanClause("v2", BooleanClause.Occur.MUST), + makeBooleanClause("v3", BooleanClause.Occur.SHOULD), + makeBooleanClause("v4", BooleanClause.Occur.SHOULD)); + checkIntervals(q, searcher, new int[][]{ + { 12, 1, 1, 2, 2 }, + { 13, 1, 1, 2, 2, 3, 3, 4, 4 }, + { 14, 0, 0, 1, 1, 2, 2, 3, 3 } + }); + } + + public void testSimpleTerm() throws IOException { + Query q = makeTermQuery("u2"); + checkIntervals(q, searcher, new int[][]{ + { 4, 0, 0, 1, 1 }, + { 5, 0, 0, 2, 2 }, + { 6, 0, 0, 1, 1 }, + { 7, 0, 0, 2, 2 }, + { 8, 0, 0, 3, 3 }, + { 9, 1, 1, 3, 3 }, + { 10, 0, 0, 3, 3 } + }); + } + + public void testBasicDisjunction() throws IOException { + Query q = makeOrQuery(makeTermQuery("v3"), makeTermQuery("v2")); + checkIntervals(q, searcher, new int[][]{ + { 12, 1, 1, 2, 2 }, + { 13, 1, 1, 2, 2, 3, 3 }, + { 14, 1, 1, 2, 2 }, + { 15, 0, 0, 2, 2 } + }); + } + + + + public void testOrSingle() throws Exception { + Query q = makeOrQuery(makeTermQuery("w5")); + checkIntervals(q, searcher, new int[][]{ + { 0, 4, 4 } + }); + } + + public void testOrPartialMatch() throws Exception { + Query q = makeOrQuery(makeTermQuery("w5"), makeTermQuery("xx")); + checkIntervals(q, searcher, new int[][]{ + { 0, 4, 4 }, + { 2, 1, 1 }, + { 3, 2, 2 }, + { 5, 1, 1 }, + { 6, 2, 2 }, + { 7, 1, 1 }, + { 8, 1, 1 }, + { 9, 2, 2 }, + { 10, 2, 2 }, + }); + } + + public void testOrDisjunctionMatch() throws Exception { + Query q = makeOrQuery(makeTermQuery("w5"), makeTermQuery("yy")); + checkIntervals(q, searcher, new int[][]{ + { 0, 4, 4 }, + { 2, 3, 3 }, + { 3, 4, 4 }, + { 7, 3, 3 } + }); + } + + // "t1 t2 t1 t3 t2 t3" + // ----------- + // -------- + // -------- + public void testOrSingleDocument() throws Exception { + Query q = makeOrQuery(makeTermQuery("t1"), makeTermQuery("t2"), makeTermQuery("t3")); + checkIntervals(q, searcher, new int[][]{ + { 11, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5 } + }); + } + + // andnot(andnot(w1, or(w2, flurble)), or(foo, bar)) + public void testConjunctionExclusionQuery() throws IOException { + BooleanQuery andnotinner = new BooleanQuery(); + andnotinner.add(makeTermQuery("w1"), BooleanClause.Occur.MUST); + BooleanQuery andnotinneror = new BooleanQuery(); + andnotinneror.add(makeTermQuery("w2"), BooleanClause.Occur.SHOULD); + andnotinneror.add(makeTermQuery("flurble"), BooleanClause.Occur.SHOULD); + andnotinner.add(andnotinneror, BooleanClause.Occur.MUST_NOT); + BooleanQuery outer = new BooleanQuery(); + outer.add(andnotinner, BooleanClause.Occur.MUST); + BooleanQuery andnotouteror = new BooleanQuery(); + andnotouteror.add(makeTermQuery("foo"), BooleanClause.Occur.SHOULD); + andnotouteror.add(makeTermQuery("bar"), BooleanClause.Occur.SHOULD); + outer.add(andnotouteror, BooleanClause.Occur.MUST_NOT); + checkIntervals(outer, searcher, new int[][]{}); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/posfilter/TestBrouwerianQuery.java b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestBrouwerianQuery.java new file mode 100644 index 0000000..5acc039 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestBrouwerianQuery.java @@ -0,0 +1,106 @@ +package org.apache.lucene.search.posfilter; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.posfilter.NonOverlappingQuery; +import org.apache.lucene.search.posfilter.OrderedNearQuery; +import org.apache.lucene.search.posfilter.UnorderedNearQuery; + +import java.io.IOException; + +public class TestBrouwerianQuery extends IntervalTestBase { + + protected void addDocs(RandomIndexWriter writer) throws IOException { + { + Document doc = new Document(); + doc.add(newField( + "field", + "The quick brown fox jumps over the lazy dog", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + + { + Document doc = new Document(); + doc.add(newField( + "field", + "The quick brown duck jumps over the lazy dog with the quick brown fox jumps", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + } + + public void testBrouwerianBooleanQuery() throws IOException { + + Query query = new OrderedNearQuery(2, makeTermQuery("the"), + makeTermQuery("quick"), makeTermQuery("jumps")); + Query sub = makeTermQuery("fox"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{ + { 1, 0, 4 } + }); + } + + public void testBrouwerianBooleanQueryExcludedDoesNotExist() throws IOException { + + Query query = new OrderedNearQuery(2, makeTermQuery("the"), + makeTermQuery("quick"), makeTermQuery("jumps")); + Query sub = makeTermQuery("blox"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 4 }, + { 1, 0, 4, 10, 14 } + }); + } + + public void testBrouwerianOverlapQuery() throws IOException { + // We want to find 'jumps NOT WITHIN 2 positions of fox' + Query sub = new UnorderedNearQuery(2, makeTermQuery("jumps"), makeTermQuery("fox")); + Query query = makeTermQuery("jumps"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{ + { 1, 4, 4 } + }); + } + + public void testBrouwerianNonExistentOverlapQuery() throws IOException { + Query sub = new UnorderedNearQuery(2, makeTermQuery("dog"), makeTermQuery("over")); + Query query = makeTermQuery("dog"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{}); + } + + public void testBrouwerianExistentOverlapQuery() throws IOException { + Query sub = new UnorderedNearQuery(1, makeTermQuery("dog"), makeTermQuery("over")); + Query query = makeTermQuery("dog"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{ + { 0, 8, 8 }, + { 1, 8, 8 } + }); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/posfilter/TestIntervalScoring.java b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestIntervalScoring.java new file mode 100644 index 0000000..1659ac6 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestIntervalScoring.java @@ -0,0 +1,63 @@ +package org.apache.lucene.search.posfilter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.posfilter.OrderedNearQuery; +import org.junit.Assert; + +import java.io.IOException; + +public class TestIntervalScoring extends IntervalTestBase { + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + for (String content : docFields) { + Document doc = new Document(); + doc.add(newField("field", content, TextField.TYPE_NOT_STORED)); + writer.addDocument(doc); + } + } + + private String[] docFields = { + "Should we, could we, would we?", + "It should - would it?", + "It shouldn't", + "Should we, should we, should we" + }; + + public void testOrderedNearQueryScoring() throws IOException { + OrderedNearQuery q = new OrderedNearQuery(10, makeTermQuery("should"), + makeTermQuery("would")); + checkScores(q, searcher, 1, 0); + } + + public void testEmptyMultiTermQueryScoring() throws IOException { + OrderedNearQuery q = new OrderedNearQuery(10, new RegexpQuery(new Term("field", "bar.*")), + new RegexpQuery(new Term("field", "foo.*"))); + TopDocs docs = searcher.search(q, 10); + assertEquals(docs.totalHits, 0); + } + + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/posfilter/TestNestedPositionFilterQueries.java b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestNestedPositionFilterQueries.java new file mode 100644 index 0000000..c93ad5b --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestNestedPositionFilterQueries.java @@ -0,0 +1,122 @@ +package org.apache.lucene.search.posfilter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; + +import java.io.IOException; + +public class TestNestedPositionFilterQueries extends IntervalTestBase { + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + for (int i = 0; i < docFields.length; i++) { + Document doc = new Document(); + doc.add(newField("field", docFields[i], TextField.TYPE_STORED)); + writer.addDocument(doc); + } + } + + private String[] docFields = { + "w1 w2 w3 w4 w5 w6 w7 w8 w9 w10 w11 w12", //0 + "w1 w3 w4 w5 w6 w7 w8", //1 + "w1 w3 w10 w4 w5 w6 w7 w8", //2 + "w1 w3 w2 w4 w5 w6 w7 w8", //3 + }; + + public void testOrderedDisjunctionQueries() throws IOException { + // Two phrases whose subparts appear in a document, but that do not fulfil the slop + // requirements of the parent IntervalFilterQuery + Query sentence1 = new OrderedNearQuery(0, makeTermQuery("w1"), makeTermQuery("w8"), makeTermQuery("w4")); + Query sentence2 = new OrderedNearQuery(0, makeTermQuery("w3"), makeTermQuery("w7"), makeTermQuery("w6")); + BooleanQuery bq = new BooleanQuery(); + bq.add(sentence1, BooleanClause.Occur.SHOULD); + bq.add(sentence2, BooleanClause.Occur.SHOULD); + checkIntervals(bq, searcher, new int[][]{}); + } + + public void testFilterDisjunctionQuery() throws IOException { + Query near1 = makeTermQuery("w4"); + Query near2 = new OrderedNearQuery(3, makeTermQuery("w1"), makeTermQuery("w10")); + BooleanQuery bq = new BooleanQuery(); + bq.add(near1, BooleanClause.Occur.SHOULD); + bq.add(near2, BooleanClause.Occur.SHOULD); + checkIntervals(bq, searcher, new int[][]{ + { 0, 3, 3 }, + { 1, 2, 2 }, + { 2, 0, 2, 3, 3 }, + { 3, 3, 3 } + }); + } + + // or(w1 pre/2 w2, w1 pre/3 w10) + public void testOrNearNearQuery() throws IOException { + Query near1 = new OrderedNearQuery(2, makeTermQuery("w1"), makeTermQuery("w2")); + Query near2 = new OrderedNearQuery(3, makeTermQuery("w1"), makeTermQuery("w10")); + BooleanQuery bq = new BooleanQuery(); + bq.add(near1, BooleanClause.Occur.SHOULD); + bq.add(near2, BooleanClause.Occur.SHOULD); + checkIntervals(bq, searcher, new int[][]{ + { 0, 0, 1 }, + { 2, 0, 2 }, + { 3, 0, 2 } + }); + } + + // or(w2 within/2 w1, w10 within/3 w1) + public void testUnorderedNearNearQuery() throws IOException { + Query near1 = new UnorderedNearQuery(2, makeTermQuery("w2"), makeTermQuery("w1")); + Query near2 = new UnorderedNearQuery(3, makeTermQuery("w10"), makeTermQuery("w1")); + BooleanQuery bq = new BooleanQuery(); + bq.add(near1, BooleanClause.Occur.SHOULD); + bq.add(near2, BooleanClause.Occur.SHOULD); + checkIntervals(bq, searcher, new int[][]{ + {0, 0, 1}, + {2, 0, 2}, + {3, 0, 2} + }); + } + + // (a pre/2 b) pre/6 (c pre/2 d) + public void testNearNearNearQuery() throws IOException { + Query near1 = new OrderedNearQuery(2, makeTermQuery("w1"), makeTermQuery("w4")); + Query near2 = new OrderedNearQuery(2, makeTermQuery("w10"), makeTermQuery("w12")); + Query near3 = new OrderedNearQuery(6, near1, near2); + checkIntervals(near3, searcher, new int[][]{ + { 0, 0, 11 } + }); + } + + public void testOrNearNearNonExistentQuery() throws IOException { + Query near1 = new OrderedNearQuery(2, makeTermQuery("w1"), makeTermQuery("w12")); + Query near2 = new OrderedNearQuery(2, makeTermQuery("w3"), makeTermQuery("w8")); + BooleanQuery bq = new BooleanQuery(); + bq.add(near1, BooleanClause.Occur.SHOULD); + bq.add(near2, BooleanClause.Occur.SHOULD); + BooleanQuery wrapper = new BooleanQuery(); + wrapper.add(bq, BooleanClause.Occur.MUST); + wrapper.add(makeTermQuery("foo"), BooleanClause.Occur.MUST_NOT); + checkIntervals(wrapper, searcher, new int[][]{}); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/posfilter/TestPhraseQueryPositions.java b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestPhraseQueryPositions.java new file mode 100644 index 0000000..0652e52 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestPhraseQueryPositions.java @@ -0,0 +1,163 @@ +package org.apache.lucene.search.posfilter; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; + +import java.io.IOException; + +public class TestPhraseQueryPositions extends IntervalTestBase { + + protected void addDocs(RandomIndexWriter writer) throws IOException { + { + Document doc = new Document(); + doc.add(newField( + "field", + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 + "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! " + // 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 + + "Some like it hot, some like it cold, Some like it in the pot nine days old! " + // 31 32 33 34 35 36 + + "Pease porridge hot! Pease porridge cold!", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + + { + Document doc = new Document(); + doc.add(newField( + "field", + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 + "Pease porridge cold! Pease porridge hot! Pease porridge in the pot nine days old! " + // 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 + + "Some like it cold, some like it hot, Some like it in the pot nine days old! " + // 31 32 33 34 35 36 + + "Pease porridge cold! Pease porridge hot!", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + } + + public void testOutOfOrderSloppyPhraseQuery() throws IOException { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "cold!")); + query.add(new Term("field", "porridge")); + query.setSlop(2); + checkIntervals(query, searcher, new int[][]{ + {0, 3, 5, 3, 7, 5, 7, 34, 36}, + {1, 0, 2, 0, 4, 2, 4, 31, 33, 31, 35, 33, 35 } + }); + } + + public void testSloppyPhraseQuery() throws IOException { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "hot!")); + query.setSlop(1); + checkIntervals(query, searcher, new int[][]{ + {0, 0, 2, 31, 33}, + {1, 3, 5, 34, 36} + }); + } + + public void testManyTermSloppyPhraseQuery() throws IOException { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "porridge")); + query.add(new Term("field", "pot")); + query.setSlop(2); + checkIntervals(query, searcher, new int[][]{ + {0, 6, 10}, + {1, 6, 10} + }); + } + + public void testMultiPhrases() throws IOException { + + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(new Term("field", "pease")); + q.add(new Term("field", "porridge")); + q.add(new Term[]{ new Term("field", "hot!"), new Term("field", "cold!") }); + + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2, 3, 5, 31, 33, 34, 36 }, + { 1, 0, 2, 3, 5, 31, 33, 34, 36 } + }); + } + + public void testOverlaps() throws IOException { + PhraseQuery q = new PhraseQuery(); + q.add(new Term("field", "some")); + q.add(new Term("field", "like")); + q.add(new Term("field", "it")); + q.add(new Term("field", "cold,")); + q.add(new Term("field", "some")); + q.add(new Term("field", "like")); + checkIntervals(q, searcher, new int[][]{ + {0, 18, 23}, + {1, 14, 19} + }); + } + + public void testMatching() throws IOException { + + PhraseQuery q = new PhraseQuery(); + q.add(new Term("field", "pease")); + q.add(new Term("field", "porridge")); + q.add(new Term("field", "hot!")); + + checkIntervals(q, searcher, new int[][]{ + {0, 0, 2, 31, 33}, + {1, 3, 5, 34, 36} + }); + + } + + public void testPartialMatching() throws IOException { + + PhraseQuery q = new PhraseQuery(); + q.add(new Term("field", "pease")); + q.add(new Term("field", "porridge")); + q.add(new Term("field", "hot!")); + q.add(new Term("field", "pease")); + q.add(new Term("field", "porridge")); + q.add(new Term("field", "cold!")); + + checkIntervals(q, searcher, new int[][]{ + {0, 0, 5, 31, 36}, + }); + + } + + public void testNonMatching() throws IOException { + + PhraseQuery q = new PhraseQuery(); + q.add(new Term("field", "pease")); + q.add(new Term("field", "hot!")); + + checkIntervals(q, searcher, new int[][]{}); + + } + + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/posfilter/TestPositionFilteredIntervals.java b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestPositionFilteredIntervals.java new file mode 100644 index 0000000..d410078 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestPositionFilteredIntervals.java @@ -0,0 +1,226 @@ +package org.apache.lucene.search.posfilter; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.carrotsearch.randomizedtesting.annotations.Seed; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; + +import java.io.IOException; + +@Seed("5DA7DDB9F83D92AC:D927BA1FCE8EC808") +public class TestPositionFilteredIntervals extends IntervalTestBase { + + public static final String field = "field"; + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + for (String content : docFields) { + Document doc = new Document(); + doc.add(newField(field, content, TextField.TYPE_NOT_STORED)); + writer.addDocument(doc); + } + } + + private String[] docFields = { + "w1 w2 w3 w4 w5", //0 + "w1 w3 w2 w3",//1 + "w1 xx w2 yy w3",//2 + "w1 w3 xx w2 yy w3",//3 + "u2 u2 u1", //4 + "u2 xx u2 u1",//5 + "u2 u2 xx u1", //6 + "u2 xx u2 yy u1", //7 + "u2 xx u1 u2",//8 + "u1 u2 xx u2",//9 + "u2 u1 xx u2",//10 + "t1 t2 t1 t3 t2 t3",//11 + "v1 v2 v3",//12 + "v1 v3 v2 v3 v4",//13 + "v4 v2 v2 v4",//14 + "v3 v4 v3"};//15 + + public void testNearOrdered01() throws Exception { + Query q = new OrderedNearQuery(0, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 } + }); + } + + public void testNearOrdered02() throws Exception { + Query q = new OrderedNearQuery(1, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 }, + { 1, 0, 3 } + }); + } + + public void testNearOrdered03() throws Exception { + Query q = new OrderedNearQuery(2, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 }, + { 1, 0, 3 }, + { 2, 0, 4 } + }); + } + + public void testNearOrdered04() throws Exception { + Query q = new OrderedNearQuery(3, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 }, + { 1, 0, 3 }, + { 2, 0, 4 }, + { 3, 0, 5 } + }); + } + + public void testNearOrdered05() throws Exception { + Query q = new OrderedNearQuery(4, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 }, + { 1, 0, 3 }, + { 2, 0, 4 }, + { 3, 0, 5 } + }); + } + + public void testNearOrderedEqual01() throws Exception { + Query q = new OrderedNearQuery(0, makeTermQuery("w1"), makeTermQuery("w3"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{}); + } + + public void testNearOrderedEqual02() throws Exception { + Query q = new OrderedNearQuery(1, makeTermQuery("w1"), makeTermQuery("w3"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 1, 0, 3 } + }); + } + + public void testNearOrderedEqual03() throws Exception { + Query q = new OrderedNearQuery(2, makeTermQuery("w1"), makeTermQuery("w3"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 1, 0, 3 } + }); + } + + public void testNearOrderedEqual04() throws Exception { + Query q = new OrderedNearQuery(3, makeTermQuery("w1"), makeTermQuery("w3"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 1, 0, 3 }, + { 3, 0, 5 } + }); + } + + public void testNearOrderedEqual11() throws Exception { + Query q = new OrderedNearQuery(0, makeTermQuery("u2"), makeTermQuery("u2"), makeTermQuery("u1")); + checkIntervals(q, searcher, new int[][]{ + { 4, 0, 2 } + }); + } + + public void testNearOrderedEqual13() throws Exception { + Query q = new OrderedNearQuery(1, makeTermQuery("u2"), makeTermQuery("u2"), makeTermQuery("u1")); + checkIntervals(q, searcher, new int[][]{ + { 4, 0, 2 }, + { 5, 0, 3 }, + { 6, 0, 3 } + }); + } + + public void testNearOrderedEqual14() throws Exception { + Query q = new OrderedNearQuery(2, makeTermQuery("u2"), makeTermQuery("u2"), makeTermQuery("u1")); + checkIntervals(q, searcher, new int[][]{ + { 4, 0, 2 }, + { 5, 0, 3 }, + { 6, 0, 3 }, + { 7, 0, 4 } + }); + } + + public void testNearOrderedEqual15() throws Exception { + Query q = new OrderedNearQuery(3, makeTermQuery("u2"), makeTermQuery("u2"), makeTermQuery("u1")); + checkIntervals(q, searcher, new int[][]{ + { 4, 0, 2 }, + { 5, 0, 3 }, + { 6, 0, 3 }, + { 7, 0, 4 } + }); + } + + public void testNearOrderedOverlap() throws Exception { + Query q = new OrderedNearQuery(3, makeTermQuery("t1"), makeTermQuery("t2"), makeTermQuery("t3")); + checkIntervals(q, searcher, new int[][]{ + { 11, 0, 3, 2, 5 } + }); + } + + public void testNearUnordered() throws Exception { + Query q = new UnorderedNearQuery(0, makeTermQuery("u1"), makeTermQuery("u2")); + checkIntervals(q, searcher, new int[][]{ + { 4, 1, 2 }, + { 5, 2, 3 }, + { 8, 2, 3 }, + { 9, 0, 1 }, + { 10, 0, 1 } + }); + } + + public void testMultipleNearUnordered() throws Exception { + Query q = new UnorderedNearQuery(1, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 }, + { 1, 0, 2 }, + { 3, 0, 3 } + }); + } + /* + "w1 w2 w3 w4 w5", //0 + "w1 w3 w2 w3",//1 + "w1 xx w2 yy w3",//2 + "w1 w3 xx w2 yy w3",//3 + "u2 u2 u1", //4 + "u2 xx u2 u1",//5 + "u2 u2 xx u1", //6 + "u2 xx u2 yy u1", //7 + "u2 xx u1 u2",//8 + "u1 u2 xx u2",//9 + "u2 u1 xx u2",//10 + "t1 t2 t1 t3 t2 t3"};//11 + */ + + // ((u1 near u2) and xx) + public void testNestedNear() throws Exception { + + Query q = new UnorderedNearQuery(0, makeTermQuery("u1"), makeTermQuery("u2")); + BooleanQuery topq = new BooleanQuery(); + topq.add(q, BooleanClause.Occur.MUST); + topq.add(makeTermQuery("xx"), BooleanClause.Occur.MUST); + + checkIntervals(topq, searcher, new int[][]{ + { 5, 1, 1, 2, 3 }, + { 8, 1, 1, 2, 3 }, + { 9, 0, 1, 2, 2 }, + { 10, 0, 1, 2, 2 } + }); + + } +} diff --git a/lucene/core/src/test/org/apache/lucene/search/posfilter/TestPositionsAndOffsets.java b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestPositionsAndOffsets.java new file mode 100644 index 0000000..d75663b5 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestPositionsAndOffsets.java @@ -0,0 +1,68 @@ +package org.apache.lucene.search.posfilter; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; + +import java.io.IOException; + +// We need to store offsets here, so don't use the following Codecs, which don't +// support them. +@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"}) +public class TestPositionsAndOffsets extends IntervalTestBase { + + protected void addDocs(RandomIndexWriter writer) throws IOException { + FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED); + fieldType.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! Some like it hot, some" + + " like it cold, Some like it in the pot nine days old! Pease porridge hot! Pease porridge cold!", + fieldType)); + writer.addDocument(doc); + } + + public void testTermQueryOffsets() throws IOException { + Query query = new TermQuery(new Term("field", "porridge")); + checkIntervalOffsets(query, searcher, new int[][]{ + { 0, 6, 14, 26, 34, 47, 55, 164, 172, 184, 192 } + }); + } + + public void testBooleanQueryOffsets() throws IOException { + BooleanQuery query = new BooleanQuery(); + query.add(new BooleanClause(new TermQuery(new Term("field", "porridge")), + BooleanClause.Occur.MUST)); + query.add(new BooleanClause(new TermQuery(new Term("field", "nine")), + BooleanClause.Occur.MUST)); + checkIntervalOffsets(query, searcher, new int[][]{ + { 0, 6, 14, 26, 34, 47, 55, 67, 71, 143, 147, 164, 172, 184, 192 } + }); + } + +} \ No newline at end of file diff --git a/lucene/core/src/test/org/apache/lucene/search/posfilter/TestRangeFilterQuery.java b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestRangeFilterQuery.java new file mode 100644 index 0000000..4b5dde0 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/posfilter/TestRangeFilterQuery.java @@ -0,0 +1,72 @@ +package org.apache.lucene.search.posfilter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.posfilter.OrderedNearQuery; +import org.apache.lucene.search.posfilter.RangeFilterQuery; +import org.junit.Test; + +import java.io.IOException; + +public class TestRangeFilterQuery extends IntervalTestBase { + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + for (int i = 0; i < docFields.length; i++) { + Document doc = new Document(); + doc.add(newField("field", docFields[i], TextField.TYPE_STORED)); + writer.addDocument(doc); + } + } + + private String[] docFields = { + "w1 w2 w3 w4 w5 w6 w7 w8 w9 w10 w11 w12", //0 + "w1 w3 w4 w5 w6 w7 w8 w4", //1 + "w1 w3 w10 w4 w5 w6 w7 w8", //2 + "w1 w3 w2 w4 w10 w5 w6 w7 w8", //3 + }; + + @Test + public void testSimpleTermRangeFilter() throws IOException { + Query q = new RangeFilterQuery(2, makeTermQuery("w4")); + checkIntervals(q, searcher, new int[][]{ + { 1, 2, 2 } + }); + } + + @Test + public void testStartEndTermRangeFilter() throws IOException { + Query q = new RangeFilterQuery(2, 4, makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 2, 2 } + }); + } + + public void testRangeFilteredPositionFilter() throws IOException { + Query q = new OrderedNearQuery(0, makeTermQuery("w4"), makeTermQuery("w5")); + q = new RangeFilterQuery(3, 10, q); + checkIntervals(q, searcher, new int[][]{ + { 0, 3, 4 }, + { 2, 3, 4 } + }); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index 475fb28..3e6b950 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -21,15 +21,16 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Weight; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -167,7 +168,7 @@ public class TestNearSpansOrdered extends LuceneTestCase { Weight w = searcher.createNormalizedWeight(q); IndexReaderContext topReaderContext = searcher.getTopReaderContext(); AtomicReaderContext leave = topReaderContext.leaves().get(0); - Scorer s = w.scorer(leave, leave.reader().getLiveDocs()); + Scorer s = w.scorer(leave, DocsEnum.FLAG_POSITIONS, leave.reader().getLiveDocs()); assertEquals(1, s.advance(1)); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java index de0038c..a0c51bb 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -17,14 +17,12 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.List; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexWriter; @@ -43,6 +41,9 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.List; + public class TestSpans extends LuceneTestCase { private IndexSearcher searcher; private IndexReader reader; @@ -429,7 +430,7 @@ public class TestSpans extends LuceneTestCase { slop, ordered); - spanScorer = searcher.createNormalizedWeight(snq).scorer(ctx, ctx.reader().getLiveDocs()); + spanScorer = searcher.createNormalizedWeight(snq).scorer(ctx, DocsEnum.FLAG_POSITIONS, ctx.reader().getLiveDocs()); } finally { searcher.setSimilarity(oldSim); } diff --git a/lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionRescorer.java b/lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionRescorer.java index 4314c27..352184d 100644 --- a/lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionRescorer.java +++ b/lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionRescorer.java @@ -17,12 +17,6 @@ package org.apache.lucene.expressions; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.queries.function.ValueSource; @@ -34,6 +28,12 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortRescorer; import org.apache.lucene.search.Weight; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + /** * A {@link Rescorer} that uses an expression to re-score * first pass hits. Functionally this is the same as {@link @@ -83,6 +83,11 @@ class ExpressionRescorer extends SortRescorer { } @Override + public int nextPosition() throws IOException { + throw new UnsupportedOperationException("FakeScorer doesn't support nextPosition()"); + } + + @Override public int nextDoc() { throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()"); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java index 972df94..07cd667 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java @@ -16,11 +16,9 @@ package org.apache.lucene.facet; * See the License for the specific language governing permissions and * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; @@ -29,10 +27,11 @@ import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.Arrays; /** Only purpose is to punch through and return a * DrillSidewaysScorer */ @@ -118,17 +117,17 @@ class DrillSidewaysQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { // We can only run as a top scorer: throw new UnsupportedOperationException(); } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) throws IOException { // TODO: it could be better if we take acceptDocs // into account instead of baseScorer? - Scorer baseScorer = baseWeight.scorer(context, acceptDocs); + Scorer baseScorer = baseWeight.scorer(context, flags, acceptDocs); DrillSidewaysScorer.DocsAndCost[] dims = new DrillSidewaysScorer.DocsAndCost[drillDowns.length]; int nullCount = 0; @@ -173,7 +172,7 @@ class DrillSidewaysQuery extends Query { dims[dim].disi = disi; } } else { - DocIdSetIterator disi = ((Weight) drillDowns[dim]).scorer(context, null); + DocIdSetIterator disi = ((Weight) drillDowns[dim]).scorer(context, flags, null); if (disi == null) { nullCount++; continue; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java index 273b6b1..82b6805 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java @@ -17,21 +17,21 @@ package org.apache.lucene.facet; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; + class DrillSidewaysScorer extends BulkScorer { //private static boolean DEBUG = false; @@ -645,6 +645,11 @@ class DrillSidewaysScorer extends BulkScorer { } @Override + public int nextPosition() throws IOException { + throw new UnsupportedOperationException("FakeScorer doesn't support nextPosition()"); + } + + @Override public int nextDoc() { throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()"); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java index 6514a3c..5d08d77 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java @@ -17,14 +17,8 @@ package org.apache.lucene.facet.taxonomy; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.lucene.facet.FacetsCollector.MatchingDocs; import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsCollector.MatchingDocs; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.queries.function.FunctionValues; @@ -35,6 +29,12 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.IntsRef; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + /** Aggregates sum of values from {@link * FunctionValues#doubleVal}, for each facet label. * @@ -68,6 +68,12 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets { FakeScorer() { super(null); } @Override public float score() throws IOException { return score; } @Override public int freq() throws IOException { throw new UnsupportedOperationException(); } + + @Override + public int nextPosition() throws IOException { + throw new UnsupportedOperationException(); + } + @Override public int docID() { return docID; } @Override public int nextDoc() throws IOException { throw new UnsupportedOperationException(); } @Override public int advance(int target) throws IOException { throw new UnsupportedOperationException(); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java index a06bca3..d1a8b77 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java @@ -1,16 +1,16 @@ package org.apache.lucene.facet.taxonomy.directory; -import java.io.IOException; - import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.ArrayUtil; +import java.io.IOException; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -129,9 +129,9 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays { // it's ok to use MultiFields because we only iterate on one posting list. // breaking it to loop over the leaves() only complicates code for no // apparent gain. - DocsAndPositionsEnum positions = MultiFields.getTermPositionsEnum(reader, null, + DocsEnum positions = MultiFields.getTermPositionsEnum(reader, null, Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF, - DocsAndPositionsEnum.FLAG_PAYLOADS); + DocsEnum.FLAG_PAYLOADS); // shouldn't really happen, if it does, something's wrong if (positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) { diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java index f40c2a7..0e1b860 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java @@ -18,15 +18,26 @@ package org.apache.lucene.search.grouping; */ -import java.io.IOException; -import java.util.Collection; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.search.*; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.PriorityQueue; +import java.io.IOException; +import java.util.Collection; + // TODO: this sentence is too long for the class summary. /** BlockGroupingCollector performs grouping with a * single pass collector, as long as you are grouping by a @@ -105,6 +116,11 @@ public class BlockGroupingCollector extends SimpleCollector { } @Override + public int nextPosition() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public int docID() { return doc; } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java index 68433a9..130b575 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.StoredDocument; @@ -213,7 +213,7 @@ public class TokenSources { ArrayList unsortedTokens = null; termsEnum = tpv.iterator(null); BytesRef text; - DocsAndPositionsEnum dpEnum = null; + DocsEnum dpEnum = null; while ((text = termsEnum.next()) != null) { dpEnum = termsEnum.docsAndPositions(null, dpEnum); diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java index bd87206..bed9505 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; @@ -67,7 +67,7 @@ public final class TokenStreamFromTermPositionVector extends TokenStream { final boolean hasPayloads = vector.hasPayloads(); final TermsEnum termsEnum = vector.iterator(null); BytesRef text; - DocsAndPositionsEnum dpEnum = null; + DocsEnum dpEnum = null; while((text = termsEnum.next()) != null) { dpEnum = termsEnum.docsAndPositions(null, dpEnum); assert dpEnum != null; // presumably checked by TokenSources.hasPositions earlier diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/DocAndPositions.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/DocAndPositions.java new file mode 100644 index 0000000..f5a21a1 --- /dev/null +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/DocAndPositions.java @@ -0,0 +1,66 @@ +package org.apache.lucene.search.highlight.positions; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.posfilter.Interval; +import org.apache.lucene.util.ArrayUtil; + +import java.util.Comparator; + +/** Used to accumulate position intervals while scoring + * @lucene.experimental + */ +public final class DocAndPositions extends ScoreDoc { + + public int posCount = 0; + public Interval[] positions; + + public DocAndPositions(int doc) { + super(doc, 0); + positions = new Interval[32]; + } + + public void storePosition (Interval pos) { + ensureStorage(); + positions[posCount++] = (Interval) pos.clone(); + } + + private void ensureStorage () { + if (posCount >= positions.length) { + Interval temp[] = new Interval[positions.length * 2]; + System.arraycopy(positions, 0, temp, 0, positions.length); + positions = temp; + } + } + + public Interval[] sortedPositions() { + ArrayUtil.timSort(positions, 0, posCount, new Comparator() { + public int compare(Interval o1, Interval o2) { + return + o1.begin < o2.begin ? -1 : + (o1.begin > o2.begin ? 1 : + (o1.end < o2.end ? -1 : + (o1.end > o2.end ? 1 : + 0))); + } + + }); + return positions; + } +} diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/HighlightingIntervalCollector.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/HighlightingIntervalCollector.java new file mode 100644 index 0000000..93b0682 --- /dev/null +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/HighlightingIntervalCollector.java @@ -0,0 +1,54 @@ +package org.apache.lucene.search.highlight.positions; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.PositionsCollector; +import org.apache.lucene.search.posfilter.Interval; + +/** + * Collects the first maxDocs docs and their positions matching the query + * + * @lucene.experimental + */ + +public class HighlightingIntervalCollector extends PositionsCollector { + + int count; + DocAndPositions docs[]; + + public HighlightingIntervalCollector (int maxDocs) { + docs = new DocAndPositions[maxDocs]; + } + + @Override + protected void collectPosition(int doc, Interval interval) { + if (count > docs.length) + return; // TODO can we indicate collection has finished somehow? + if (count <= 0 || docs[count - 1].doc != doc) { + DocAndPositions spdoc = new DocAndPositions(doc); + docs[count++] = spdoc; + } + docs[count - 1].storePosition(interval); + } + + public DocAndPositions[] getDocs () { + DocAndPositions ret[] = new DocAndPositions[count]; + System.arraycopy(docs, 0, ret, 0, count); + return ret; + } + +} diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/IntervalTokenStream.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/IntervalTokenStream.java new file mode 100644 index 0000000..e74959c --- /dev/null +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/IntervalTokenStream.java @@ -0,0 +1,73 @@ +package org.apache.lucene.search.highlight.positions; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.search.posfilter.Interval; + +import java.io.IOException; + +/** + * A TokenStream constructed from a stream of positions and their offsets. + * The document is segmented into tokens at the start and end offset of each interval. The intervals + * are assumed to be non-overlapping. + * + * TODO: abstract the dependency on the current PositionOffsetMapper impl; + * allow for implementations of position->offset maps that don't rely on term vectors. + * + * @lucene.experimental + */ +public class IntervalTokenStream extends TokenStream { + + //this tokenizer generates four attributes: + // term, offset, positionIncrement? and type? + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + //private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final String text; + private final Interval[] positions; + + // the index of the current position interval + private int pos = -1; + + public IntervalTokenStream (String text, Interval[] positions) { + this.text = text; + this.positions = positions; + } + + @Override + public final boolean incrementToken() throws IOException { + if (++pos >= positions.length) + return false; + if (positions[pos] == null) + return false; + int b, e; + b = positions[pos].offsetBegin; + e = positions[pos].offsetEnd; + assert b >=0; + termAtt.append(text, b, e); + offsetAtt.setOffset(b, e); + posIncrAtt.setPositionIncrement(1); + return true; + } + +} diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java index bf2f1d2..903c47e 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java @@ -17,16 +17,10 @@ package org.apache.lucene.search.postingshighlight; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; - import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.BooleanClause; @@ -47,10 +41,16 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.automaton.Automata; -import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.LevenshteinAutomata; -import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.Operations; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; /** * Support for highlighting multiterm queries in PostingsHighlighter. @@ -197,7 +197,7 @@ class MultiTermHighlighting { *

    * This is solely used internally by PostingsHighlighter: DO NOT USE THIS METHOD! */ - static DocsAndPositionsEnum getDocsEnum(final TokenStream ts, final CharacterRunAutomaton[] matchers) throws IOException { + static DocsEnum getDocsEnum(final TokenStream ts, final CharacterRunAutomaton[] matchers) throws IOException { final CharTermAttribute charTermAtt = ts.addAttribute(CharTermAttribute.class); final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); ts.reset(); @@ -207,7 +207,7 @@ class MultiTermHighlighting { // would only serve to make this method less bogus. // instead, we always return freq() = Integer.MAX_VALUE and let PH terminate based on offset... - return new DocsAndPositionsEnum() { + return new DocsEnum() { int currentDoc = -1; int currentMatch = -1; int currentStartOffset = -1; diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java index e55ae36..de69b31 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java @@ -17,23 +17,10 @@ package org.apache.lucene.search.postingshighlight; * limitations under the License. */ -import java.io.IOException; -import java.text.BreakIterator; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.PriorityQueue; -import java.util.SortedSet; -import java.util.TreeSet; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexReader; @@ -53,6 +40,19 @@ import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import java.io.IOException; +import java.text.BreakIterator; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.SortedSet; +import java.util.TreeSet; + /** * Simple highlighter that does not analyze fields nor use * term vectors. Instead it requires @@ -455,7 +455,7 @@ public class PostingsHighlighter { private Map highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List leaves, int maxPassages, Query query) throws IOException { Map highlights = new HashMap<>(); - + PassageFormatter fieldFormatter = getFormatter(field); if (fieldFormatter == null) { throw new NullPointerException("PassageFormatter cannot be null"); @@ -477,7 +477,7 @@ public class PostingsHighlighter { // we are processing in increasing docid order, so we only need to reinitialize stuff on segment changes // otherwise, we will just advance() existing enums to the new document in the same segment. - DocsAndPositionsEnum postings[] = null; + DocsEnum postings[] = null; TermsEnum termsEnum = null; int lastLeaf = -1; @@ -499,7 +499,7 @@ public class PostingsHighlighter { Terms t = r.terms(field); if (t != null) { termsEnum = t.iterator(null); - postings = new DocsAndPositionsEnum[terms.length]; + postings = new DocsEnum[terms.length]; } } if (termsEnum == null) { @@ -508,7 +508,7 @@ public class PostingsHighlighter { // if there are multi-term matches, we have to initialize the "fake" enum for each document if (automata.length > 0) { - DocsAndPositionsEnum dp = MultiTermHighlighting.getDocsEnum(analyzer.tokenStream(field, content), automata); + DocsEnum dp = MultiTermHighlighting.getDocsEnum(analyzer.tokenStream(field, content), automata); dp.advance(doc - subContext.docBase); postings[terms.length-1] = dp; // last term is the multiterm matcher } @@ -534,7 +534,7 @@ public class PostingsHighlighter { // we can intersect these with the postings lists via BreakIterator.preceding(offset),s // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq)) private Passage[] highlightDoc(String field, BytesRef terms[], int contentLength, BreakIterator bi, int doc, - TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n) throws IOException { + TermsEnum termsEnum, DocsEnum[] postings, int n) throws IOException { PassageScorer scorer = getScorer(field); if (scorer == null) { throw new NullPointerException("PassageScorer cannot be null"); @@ -543,7 +543,7 @@ public class PostingsHighlighter { float weights[] = new float[terms.length]; // initialize postings for (int i = 0; i < terms.length; i++) { - DocsAndPositionsEnum de = postings[i]; + DocsEnum de = postings[i]; int pDoc; if (de == EMPTY) { continue; @@ -552,7 +552,7 @@ public class PostingsHighlighter { if (!termsEnum.seekExact(terms[i])) { continue; // term not found } - de = postings[i] = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS); + de = postings[i] = termsEnum.docsAndPositions(null, null, DocsEnum.FLAG_OFFSETS); if (de == null) { // no positions available throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); @@ -590,7 +590,7 @@ public class PostingsHighlighter { OffsetsEnum off; while ((off = pq.poll()) != null) { - final DocsAndPositionsEnum dp = off.dp; + final DocsEnum dp = off.dp; int start = dp.startOffset(); if (start == -1) { throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); @@ -698,11 +698,11 @@ public class PostingsHighlighter { } private static class OffsetsEnum implements Comparable { - DocsAndPositionsEnum dp; + DocsEnum dp; int pos; int id; - OffsetsEnum(DocsAndPositionsEnum dp, int id) throws IOException { + OffsetsEnum(DocsEnum dp, int id) throws IOException { this.dp = dp; this.id = id; this.pos = 1; @@ -724,10 +724,10 @@ public class PostingsHighlighter { } } - private static final DocsAndPositionsEnum EMPTY = new DocsAndPositionsEnum() { + private static final DocsEnum EMPTY = new DocsEnum() { @Override - public int nextPosition() throws IOException { return 0; } + public int nextPosition() throws IOException { return NO_MORE_POSITIONS; } @Override public int startOffset() throws IOException { return Integer.MAX_VALUE; } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java index 29c307a..1f629d6 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java @@ -22,7 +22,7 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.Set; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -93,7 +93,7 @@ public class FieldTermStack { final CharsRefBuilder spare = new CharsRefBuilder(); final TermsEnum termsEnum = vector.iterator(null); - DocsAndPositionsEnum dpEnum = null; + DocsEnum dpEnum = null; BytesRef text; int numDocs = reader.maxDoc(); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java index 536259a..a52d9b7 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java @@ -16,9 +16,6 @@ package org.apache.lucene.search.highlight.custom; * See the License for the specific language governing permissions and * limitations under the License. */ -import java.io.IOException; -import java.util.Map; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; @@ -36,6 +33,9 @@ import org.apache.lucene.search.highlight.WeightedSpanTerm; import org.apache.lucene.search.highlight.WeightedSpanTermExtractor; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.Map; + /** * Tests the extensibility of {@link WeightedSpanTermExtractor} and * {@link QueryScorer} in a user defined package diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/positions/IntervalHighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/positions/IntervalHighlighterTest.java new file mode 100644 index 0000000..37cd49d --- /dev/null +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/positions/IntervalHighlighterTest.java @@ -0,0 +1,512 @@ +package org.apache.lucene.search.highlight.positions; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.search.highlight.SimpleFragmenter; +import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.search.posfilter.NonOverlappingQuery; +import org.apache.lucene.search.posfilter.OrderedNearQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.junit.Ignore; + +import java.io.IOException; +import java.io.StringReader; + +/** + * TODO: FIX THIS TEST Phrase and Span Queries positions callback API + */ +@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"}) +public class IntervalHighlighterTest extends LuceneTestCase { + + protected final static String F = "f"; + protected Analyzer analyzer; + protected Directory dir; + protected IndexSearcher searcher; + private IndexWriterConfig iwc; + + private static final String PORRIDGE_VERSE = "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! Some like it hot, some" + + " like it cold, Some like it in the pot nine days old! Pease porridge hot! Pease porridge cold!"; + + public void setUp() throws Exception { + super.setUp(); + iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.CREATE); + analyzer = iwc.getAnalyzer(); + dir = newDirectory(); + } + + public void close() throws IOException { + if (searcher != null) { + searcher.getIndexReader().close(); + searcher = null; + } + dir.close(); + } + + // make several docs + protected void insertDocs(Analyzer analyzer, String... values) + throws Exception { + IndexWriter writer = new IndexWriter(dir, iwc); + FieldType type = new FieldType(); + type.setIndexed(true); + type.setTokenized(true); + type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + type.setStored(true); + for (String value : values) { + Document doc = new Document(); + Field f = newField(F, value, type); + doc.add(f); + writer.addDocument(doc); + } + writer.close(); + if (searcher != null) { + searcher.getIndexReader().close(); + } + searcher = new IndexSearcher(DirectoryReader.open(dir)); + } + + protected static TermQuery termQuery(String term) { + return new TermQuery(new Term(F, term)); + } + + private String[] doSearch(Query q) throws IOException, + InvalidTokenOffsetsException { + return doSearch(q, 100); + } + + private class ConstantScorer implements + org.apache.lucene.search.highlight.Scorer { + + @Override + public TokenStream init(TokenStream tokenStream) throws IOException { + return tokenStream; + } + + @Override + public void startFragment(TextFragment newFragment) {} + + @Override + public float getTokenScore() { + return 1; + } + + @Override + public float getFragmentScore() { + return 1; + } + } + + private String getHighlight(Query q) throws IOException, InvalidTokenOffsetsException { + return doSearch(q, Integer.MAX_VALUE)[0]; + } + + private String[] doSearch(Query q, int maxFragSize) throws IOException, + InvalidTokenOffsetsException { + return doSearch(q, maxFragSize, 0); + } + private String[] doSearch(Query q, int maxFragSize, int docIndex) throws IOException, InvalidTokenOffsetsException { + return doSearch(q, maxFragSize, docIndex, false); + } + private String[] doSearch(Query q, int maxFragSize, int docIndex, boolean analyze) + throws IOException, InvalidTokenOffsetsException { + // ConstantScorer is a fragment Scorer, not a search result (document) + // Scorer + Highlighter highlighter = new Highlighter(new ConstantScorer()); + highlighter.setTextFragmenter(new SimpleFragmenter(maxFragSize)); + HighlightingIntervalCollector collector = new HighlightingIntervalCollector(10); + if (q instanceof MultiTermQuery) { + ((MultiTermQuery) q) + .setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); + } + searcher.search(q, collector); + DocAndPositions doc = collector.docs[docIndex]; + if (doc == null) return null; + String text = searcher.getIndexReader().document(doc.doc).get(F); + // FIXME: test error cases: for non-stored fields, and fields w/no term + // vectors + // searcher.getIndexReader().getTermFreqVector(doc.doc, F, pom); + final TokenStream stream; + if (analyze) { + stream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, + MockTokenFilter.EMPTY_STOPSET).tokenStream(F, + new StringReader(text)); + } else { + stream = new IntervalTokenStream(text, doc.sortedPositions()); + } + // + TextFragment[] fragTexts = highlighter.getBestTextFragments( + stream , text, false, 10); + String[] frags = new String[fragTexts.length]; + for (int i = 0; i < frags.length; i++) + frags[i] = fragTexts[i].toString(); + return frags; + } + + public void testTerm() throws Exception { + insertDocs(analyzer, "This is a test test"); + String frags[] = doSearch(termQuery("test")); + assertEquals("This is a test test", frags[0]); + close(); + } + + public void testSeveralSnippets() throws Exception { + String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " + + "Let us see what happens to long in this case."; + String gold = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " + + "Let us see what happens to long in this case."; + insertDocs(analyzer, input); + String frags[] = doSearch(termQuery("long"), input.length()); + assertEquals(gold, frags[0]); + close(); + } + + public void testBooleanAnd() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(termQuery("This"), Occur.MUST)); + bq.add(new BooleanClause(termQuery("test"), Occur.MUST)); + String frags[] = doSearch(bq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testConstantScore() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(termQuery("This"), Occur.MUST)); + bq.add(new BooleanClause(termQuery("test"), Occur.MUST)); + String frags[] = doSearch(new ConstantScoreQuery(bq)); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testBooleanAndOtherOrder() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.MUST)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "This")), Occur.MUST)); + String frags[] = doSearch(bq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testBooleanOr() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.SHOULD)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "This")), Occur.SHOULD)); + String frags[] = doSearch(bq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testSingleMatchScorer() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.SHOULD)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "notoccurringterm")), + Occur.SHOULD)); + String frags[] = doSearch(bq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testBooleanNrShouldMatch() throws Exception { + insertDocs(analyzer, "a b c d e f g h i"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "a")), Occur.SHOULD)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "b")), Occur.SHOULD)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "no")), Occur.SHOULD)); + + // This generates a ConjunctionSumScorer + bq.setMinimumNumberShouldMatch(2); + String frags[] = doSearch(bq); + assertEquals("a b c d e f g h i", frags[0]); + + // This generates no scorer + bq.setMinimumNumberShouldMatch(3); + frags = doSearch(bq); + assertNull(frags); + + // This generates a DisjunctionSumScorer + bq.setMinimumNumberShouldMatch(2); + bq.add(new BooleanClause(new TermQuery(new Term(F, "c")), Occur.SHOULD)); + frags = doSearch(bq); + assertEquals("a b c d e f g h i", frags[0]); + close(); + } + + public void testPhrase() throws Exception { + insertDocs(analyzer, "is it that this is a test, is it"); + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term(F, "is")); + pq.add(new Term(F, "a")); + String frags[] = doSearch(pq); + // make sure we highlight the phrase, and not the terms outside the phrase + assertEquals("is it that this is a test, is it", frags[0]); + close(); + } + + /* + * Failing ... PhraseQuery scorer needs positions()? + */ + //@Ignore + public void testPhraseOriginal() throws Exception { + insertDocs(analyzer, "This is a test"); + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term(F, "a")); + pq.add(new Term(F, "test")); + String frags[] = doSearch(pq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testNestedBoolean() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.SHOULD)); + BooleanQuery bq2 = new BooleanQuery(); + bq2.add(new BooleanClause(new TermQuery(new Term(F, "This")), Occur.SHOULD)); + bq2.add(new BooleanClause(new TermQuery(new Term(F, "is")), Occur.SHOULD)); + bq.add(new BooleanClause(bq2, Occur.SHOULD)); + String frags[] = doSearch(bq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testWildcard() throws Exception { + insertDocs(analyzer, "This is a test"); + String frags[] = doSearch(new WildcardQuery(new Term(F, "t*t"))); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testMixedBooleanNot() throws Exception { + insertDocs(analyzer, "this is a test", "that is an elephant"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.MUST)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "that")), Occur.MUST_NOT)); + String frags[] = doSearch(bq); + assertEquals("this is a test", frags[0]); + close(); + } + + public void testMixedBooleanShould() throws Exception { + insertDocs(analyzer, "this is a test", "that is an elephant", "the other was a rhinoceros"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "is")), Occur.MUST)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.SHOULD)); + String frags[] = doSearch(bq, 50, 0); + assertEquals("this is a test", frags[0]); + frags = doSearch(bq, 50, 1); + assertEquals("that is an elephant", frags[0]); + + bq.add(new BooleanClause(new TermQuery(new Term(F, "rhinoceros")), Occur.SHOULD)); + frags = doSearch(bq, 50, 0); + assertEquals("this is a test", frags[0]); + frags = doSearch(bq, 50, 1); + assertEquals("that is an elephant", frags[0]); + close(); + } + + public void testMultipleDocumentsAnd() throws Exception { + insertDocs(analyzer, "This document has no matches", PORRIDGE_VERSE, + "This document has some Pease porridge in it"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "Pease")), Occur.MUST)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "porridge")), Occur.MUST)); + String frags[] = doSearch(bq, 50, 0); + assertEquals( + "Pease porridge hot! Pease porridge cold! Pease", + frags[0]); + frags = doSearch(bq, 50, 1); + assertEquals("This document has some Pease porridge in it", + frags[0]); + close(); + } + + + public void testMultipleDocumentsOr() throws Exception { + insertDocs(analyzer, "This document has no matches", PORRIDGE_VERSE, + "This document has some Pease porridge in it"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "Pease")), Occur.SHOULD)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "porridge")), + Occur.SHOULD)); + String frags[] = doSearch(bq, 50, 0); + assertEquals( + "Pease porridge hot! Pease porridge cold! Pease", + frags[0]); + frags = doSearch(bq, 50, 1); + assertEquals("This document has some Pease porridge in it", + frags[0]); + close(); + } + + public void testBrouwerianQuery() throws Exception { + + insertDocs(analyzer, "the quick brown duck jumps over the lazy dog with the quick brown fox"); + + BooleanQuery query = new BooleanQuery(); + query.add(new BooleanClause(new TermQuery(new Term(F, "the")), Occur.MUST)); + query.add(new BooleanClause(new TermQuery(new Term(F, "quick")), Occur.MUST)); + query.add(new BooleanClause(new TermQuery(new Term(F, "jumps")), Occur.MUST)); + + assertEquals(getHighlight(query), + "the quick brown duck jumps over the lazy dog with the quick brown fox"); + + BooleanQuery sub = new BooleanQuery(); + sub.add(new BooleanClause(new TermQuery(new Term(F, "duck")), Occur.MUST)); + NonOverlappingQuery bq = new NonOverlappingQuery(query, sub); + + assertEquals(getHighlight(bq), + "the quick brown duck jumps over the lazy dog with the quick brown fox"); + + close(); + } + + @Ignore("not implemented yet - unsupported") + public void testMultiPhraseQuery() throws Exception { + MultiPhraseQuery query = new MultiPhraseQuery(); + insertDocs(analyzer, "pease porridge hot but not too hot or otherwise pease porridge cold"); + + query.add(terms(F, "pease"), 0); + query.add(terms(F, "porridge"), 1); + query.add(terms(F, "hot", "cold"), 2); + query.setSlop(1); + + String[] frags = doSearch(query, Integer.MAX_VALUE); + assertEquals("pease porridge hot but not too hot or otherwise pease porridge cold", frags[0]); + + close(); + } + + @Ignore("not implemented yet - unsupported") + public void testMultiPhraseQueryCollisions() throws Exception { + MultiPhraseQuery query = new MultiPhraseQuery(); + insertDocs(analyzer, "pease porridge hot not too hot or otherwise pease porridge porridge"); + + query.add(terms(F, "pease"), 0); + query.add(terms(F, "porridge"), 1); + query.add(terms(F, "coldasice", "porridge" ), 2); + query.setSlop(1); + + String[] frags = doSearch(query, Integer.MAX_VALUE); + assertEquals("pease porridge hot but not too hot or otherwise pease porridge porridge", frags[0]); + + close(); + } + + public void testNearPhraseQuery() throws Exception { + + insertDocs(analyzer, "pease porridge rather hot and pease porridge fairly cold"); + + Query firstQ = new OrderedNearQuery(4, termQuery("pease"), termQuery("porridge"), termQuery("hot")); + { + String frags[] = doSearch(firstQ, Integer.MAX_VALUE); + assertEquals("pease porridge rather hot and pease porridge fairly cold", frags[0]); + } + + // near.3(near.4(pease, porridge, hot), near.4(pease, porridge, cold)) + Query q = new OrderedNearQuery(3, + firstQ, + new OrderedNearQuery(4, termQuery("pease"), termQuery("porridge"), termQuery("cold"))); + + String frags[] = doSearch(q, Integer.MAX_VALUE); + assertEquals("pease porridge rather hot and pease porridge fairly cold", + frags[0]); + + close(); + } + + private Term[] terms(String field, String...tokens) { + Term[] terms = new Term[tokens.length]; + for (int i = 0; i < tokens.length; i++) { + terms[i] = new Term(field, tokens[i]); + } + return terms; + } + + public void testSloppyPhraseQuery() throws Exception { + assertSloppyPhrase( "a b c d a b c d e f", "a b c d a b c d e f", 2, "c", "a"); + assertSloppyPhrase( "a c e b d e f a b","a c e b d e f a b", 2, "a", "b"); + assertSloppyPhrase( "Y A X B A", "Y A X B A", 2, "X", "A", "A"); + + assertSloppyPhrase( "X A X B A","X A X B A", 2, "X", "A", "A"); // non overlapping minmal!! + assertSloppyPhrase( "A A A X",null, 2, "X", "A", "A"); + assertSloppyPhrase( "A A X A", "A A X A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A X A Y B A", "A A X A Y B A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A X", null, 2, "X", "A", "A"); + assertSloppyPhrase( "A X A", null, 1, "X", "A", "A"); + + assertSloppyPhrase( "A X B A", "A X B A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A X A X B A X B B A A X B A A", "A A X A X B A X B B A A X B A A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A X A X B A X B B A A X B A A", "A A X A X B A X B B A A X B A A", 2, "X", "A", "A"); + + assertSloppyPhrase( "A A X A X B A", "A A X A X B A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A Y A X B A", "A A Y A X B A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A Y A X B A A", "A A Y A X B A A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A X A Y B A", null , 1, "X", "A", "A"); + close(); + } + + + private void assertSloppyPhrase(String doc, String expected, int slop, String...query) throws Exception { + insertDocs(analyzer, doc); + PhraseQuery pq = new PhraseQuery(); + for (String string : query) { + pq.add(new Term(F, string)); + } + + pq.setSlop(slop); +// System.out.println(doc); + String[] frags = doSearch(pq, 50); + if (expected == null) { + assertNull(frags != null ? frags[0] : "", frags); + } else { + assertEquals(expected, frags[0]); + } + } + +} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/FakeScorer.java b/lucene/join/src/java/org/apache/lucene/search/join/FakeScorer.java index cbd1ff8..61be4bc 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/FakeScorer.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/FakeScorer.java @@ -17,12 +17,13 @@ package org.apache.lucene.search.join; * limitations under the License. */ -import java.util.Collection; - import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; +import java.io.IOException; +import java.util.Collection; + /** Passed to {@link LeafCollector#setScorer} during join collection. */ final class FakeScorer extends Scorer { float score; @@ -49,6 +50,11 @@ final class FakeScorer extends Scorer { } @Override + public int nextPosition() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public int nextDoc() { throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()"); } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java index 220d0e1..08c926e 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java @@ -17,31 +17,30 @@ package org.apache.lucene.search.join; * limitations under the License. */ -import java.io.IOException; -import java.util.Locale; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Collector; +import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.ComplexExplanation; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.FixedBitSet; +import java.io.IOException; +import java.util.Locale; +import java.util.Set; + class TermsIncludingScoreQuery extends Query { final String field; @@ -133,7 +132,7 @@ class TermsIncludingScoreQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - SVInnerScorer scorer = (SVInnerScorer) bulkScorer(context, false, null); + SVInnerScorer scorer = (SVInnerScorer) bulkScorer(context, false, DocsEnum.FLAG_FREQS, null); if (scorer != null) { return scorer.explain(doc); } @@ -163,7 +162,7 @@ class TermsIncludingScoreQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { Terms terms = context.reader().terms(field); if (terms == null) { return null; @@ -181,10 +180,10 @@ class TermsIncludingScoreQuery extends Query { } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) throws IOException { if (scoreDocsInOrder) { - return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); } else { Terms terms = context.reader().terms(field); if (terms == null) { @@ -288,6 +287,7 @@ class TermsIncludingScoreQuery extends Query { return new ComplexExplanation(true, scores[ords[scoreUpto]], "Score based on join value " + termsEnum.term().utf8ToString()); } + } // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted @@ -361,6 +361,11 @@ class TermsIncludingScoreQuery extends Query { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int docID() { return currentDoc; } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java index c555c4a..1df59a1 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java @@ -17,11 +17,6 @@ package org.apache.lucene.search.join; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -35,6 +30,11 @@ import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.Set; + /** * Just like {@link ToParentBlockJoinQuery}, except this * query joins in reverse: you provide a Query matching @@ -124,9 +124,9 @@ public class ToChildBlockJoinQuery extends Query { // NOTE: acceptDocs applies (and is checked) only in the // child document space @Override - public Scorer scorer(AtomicReaderContext readerContext, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext readerContext, int flags, Bits acceptDocs) throws IOException { - final Scorer parentScorer = parentWeight.scorer(readerContext, null); + final Scorer parentScorer = parentWeight.scorer(readerContext, flags, null); if (parentScorer == null) { // No matches @@ -282,6 +282,11 @@ public class ToChildBlockJoinQuery extends Query { } @Override + public int nextPosition() throws IOException { + return -1; // nocommit do positions make sense here? + } + + @Override public int advance(int childTarget) throws IOException { assert childTarget >= parentBits.length() || !parentBits.get(childTarget); diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java index 65767fc..73dd219 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java @@ -18,15 +18,29 @@ package org.apache.lucene.search.join; */ import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexWriter; // javadocs -import org.apache.lucene.search.*; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.FieldValueHitQueue; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreCachingWrappingScorer; +import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer.ChildScorer; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.util.ArrayUtil; import java.io.IOException; -import java.util.*; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Map; +import java.util.Queue; /** Collects parent document hits for a Query containing one more more diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java index f6985e2..658b0b7 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java @@ -17,13 +17,8 @@ package org.apache.lucene.search.join; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.Locale; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; @@ -41,6 +36,12 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.Locale; +import java.util.Set; + /** * This query requires that you index * children and parent docs as a single block, using the @@ -158,9 +159,9 @@ public class ToParentBlockJoinQuery extends Query { // NOTE: acceptDocs applies (and is checked) only in the // parent document space @Override - public Scorer scorer(AtomicReaderContext readerContext, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext readerContext, int flags, Bits acceptDocs) throws IOException { - final Scorer childScorer = childWeight.scorer(readerContext, readerContext.reader().getLiveDocs()); + final Scorer childScorer = childWeight.scorer(readerContext, flags, readerContext.reader().getLiveDocs()); if (childScorer == null) { // No matches return null; @@ -192,7 +193,7 @@ public class ToParentBlockJoinQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, context.reader().getLiveDocs()); + BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); if (scorer != null && scorer.advance(doc) == doc) { return scorer.explain(context.docBase); } @@ -379,6 +380,11 @@ public class ToParentBlockJoinQuery extends Query { } @Override + public int nextPosition() throws IOException { + return -1; // nocommit do positions make sense here? + } + + @Override public int advance(int parentTarget) throws IOException { //System.out.println("Q.advance parentTarget=" + parentTarget); diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java index 8642d3c..e094b37 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java @@ -17,6 +17,57 @@ package org.apache.lucene.search.join; * limitations under the License. */ +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.ReaderUtil; +import org.apache.lucene.index.StoredDocument; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryUtils; +import org.apache.lucene.search.QueryWrapperFilter; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.grouping.GroupDocs; +import org.apache.lucene.search.grouping.TopGroups; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.TestUtil; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -24,16 +75,6 @@ import java.util.Collections; import java.util.List; import java.util.Locale; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.*; -import org.apache.lucene.index.*; -import org.apache.lucene.search.*; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.grouping.GroupDocs; -import org.apache.lucene.search.grouping.TopGroups; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.*; - public class TestBlockJoin extends LuceneTestCase { // One resume... @@ -1148,7 +1189,7 @@ public class TestBlockJoin extends LuceneTestCase { ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg); Weight weight = s.createNormalizedWeight(q); - DocIdSetIterator disi = weight.scorer(s.getIndexReader().leaves().get(0), null); + DocIdSetIterator disi = weight.scorer(s.getIndexReader().leaves().get(0), DocsEnum.FLAG_FREQS, null); assertEquals(1, disi.advance(1)); r.close(); dir.close(); @@ -1182,7 +1223,7 @@ public class TestBlockJoin extends LuceneTestCase { ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg); Weight weight = s.createNormalizedWeight(q); - DocIdSetIterator disi = weight.scorer(s.getIndexReader().leaves().get(0), null); + DocIdSetIterator disi = weight.scorer(s.getIndexReader().leaves().get(0), DocsEnum.FLAG_FREQS, null); assertEquals(2, disi.advance(0)); r.close(); dir.close(); diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index f7cdf60..9f58883 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -17,15 +17,6 @@ package org.apache.lucene.index.memory; * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.NoSuchElementException; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -34,10 +25,9 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.Fields; @@ -55,21 +45,29 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.store.RAMDirectory; // for javadocs +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; import org.apache.lucene.util.Counter; +import org.apache.lucene.util.IntBlockPool; import org.apache.lucene.util.IntBlockPool.SliceReader; import org.apache.lucene.util.IntBlockPool.SliceWriter; -import org.apache.lucene.util.IntBlockPool; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RecyclingByteBlockAllocator; import org.apache.lucene.util.RecyclingIntBlockAllocator; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; /** * High-performance single-document main memory Apache Lucene fulltext search index. @@ -990,19 +988,16 @@ public class MemoryIndex { @Override public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) { - if (reuse == null || !(reuse instanceof MemoryDocsEnum)) { - reuse = new MemoryDocsEnum(); - } - return ((MemoryDocsEnum) reuse).reset(liveDocs, info.sliceArray.freq[info.sortedTerms[termUpto]]); + return docsAndPositions(liveDocs, reuse, flags); } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { - if (reuse == null || !(reuse instanceof MemoryDocsAndPositionsEnum)) { - reuse = new MemoryDocsAndPositionsEnum(); + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) { + if (reuse == null || !(reuse instanceof MemoryDocsEnum)) { + reuse = new MemoryDocsEnum(); } final int ord = info.sortedTerms[termUpto]; - return ((MemoryDocsAndPositionsEnum) reuse).reset(liveDocs, info.sliceArray.start[ord], info.sliceArray.end[ord], info.sliceArray.freq[ord]); + return ((MemoryDocsEnum) reuse).reset(liveDocs, info.sliceArray.start[ord], info.sliceArray.end[ord], info.sliceArray.freq[ord]); } @Override @@ -1020,51 +1015,6 @@ public class MemoryIndex { } private class MemoryDocsEnum extends DocsEnum { - private boolean hasNext; - private Bits liveDocs; - private int doc = -1; - private int freq; - - public DocsEnum reset(Bits liveDocs, int freq) { - this.liveDocs = liveDocs; - hasNext = true; - doc = -1; - this.freq = freq; - return this; - } - - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() { - if (hasNext && (liveDocs == null || liveDocs.get(0))) { - hasNext = false; - return doc = 0; - } else { - return doc = NO_MORE_DOCS; - } - } - - @Override - public int advance(int target) throws IOException { - return slowAdvance(target); - } - - @Override - public int freq() throws IOException { - return freq; - } - - @Override - public long cost() { - return 1; - } - } - - private class MemoryDocsAndPositionsEnum extends DocsAndPositionsEnum { private int posUpto; // for assert private boolean hasNext; private Bits liveDocs; @@ -1074,11 +1024,11 @@ public class MemoryIndex { private int startOffset; private int endOffset; - public MemoryDocsAndPositionsEnum() { + public MemoryDocsEnum() { this.sliceReader = new SliceReader(intBlockPool); } - public DocsAndPositionsEnum reset(Bits liveDocs, int start, int end, int freq) { + public DocsEnum reset(Bits liveDocs, int start, int end, int freq) { this.liveDocs = liveDocs; this.sliceReader.reset(start, end); posUpto = 0; // for assert @@ -1116,7 +1066,9 @@ public class MemoryIndex { @Override public int nextPosition() { - assert posUpto++ < freq; + //assert posUpto++ < freq; + if (posUpto++ >= freq) + return NO_MORE_POSITIONS; assert !sliceReader.endOfSlice() : " stores offsets : " + startOffset; if (storeOffsets) { int pos = sliceReader.readInt(); diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java index 0de8e72..a0e7bc1 100644 --- a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java @@ -44,7 +44,7 @@ import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.CompositeReader; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; @@ -198,9 +198,9 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { while(iwTermsIter.next() != null) { assertNotNull(memTermsIter.next()); assertEquals(iwTermsIter.term(), memTermsIter.term()); - DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.docsAndPositions(null, null); - DocsAndPositionsEnum memDocsAndPos = memTermsIter.docsAndPositions(null, null); - while(iwDocsAndPos.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { + DocsEnum iwDocsAndPos = iwTermsIter.docsAndPositions(null, null); + DocsEnum memDocsAndPos = memTermsIter.docsAndPositions(null, null); + while(iwDocsAndPos.nextDoc() != DocsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc()); assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq()); for (int i = 0; i < iwDocsAndPos.freq(); i++) { @@ -219,7 +219,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { assertEquals(iwTermsIter.term(), memTermsIter.term()); DocsEnum iwDocsAndPos = iwTermsIter.docs(null, null); DocsEnum memDocsAndPos = memTermsIter.docs(null, null); - while(iwDocsAndPos.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { + while(iwDocsAndPos.nextDoc() != DocsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc()); assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq()); } @@ -345,7 +345,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { memory.addField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader(); assertEquals(1, reader.terms("foo").getSumTotalTermFreq()); - DocsAndPositionsEnum disi = reader.termPositionsEnum(new Term("foo", "bar")); + DocsEnum disi = reader.termPositionsEnum(new Term("foo", "bar")); int docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java index 1926d7b..b8a5685 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java +++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java @@ -17,9 +17,6 @@ package org.apache.lucene.index.sorter; * limitations under the License. */ -import java.io.IOException; -import java.util.Comparator; - import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldComparator; @@ -30,6 +27,9 @@ import org.apache.lucene.util.TimSorter; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; +import java.io.IOException; +import java.util.Comparator; + /** * Sorts documents of a given index by returning a permutation on the document * IDs. @@ -270,6 +270,11 @@ final class Sorter { public int freq() throws IOException { throw new UnsupportedOperationException(); } @Override + public int nextPosition() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public int docID() { throw new UnsupportedOperationException(); } @Override diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java index e81dea7..6f9ba39 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java +++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java @@ -17,12 +17,8 @@ package org.apache.lucene.index.sorter; * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; - import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfos; @@ -49,6 +45,9 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.TimSorter; import org.apache.lucene.util.automaton.CompiledAutomaton; +import java.io.IOException; +import java.util.Arrays; + /** * An {@link AtomicReader} which supports sorting documents by a given * {@link Sort}. You can use this class to sort an index as follows: @@ -164,30 +163,8 @@ public class SortingAtomicReader extends FilterAtomicReader { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, final int flags) throws IOException { - final DocsAndPositionsEnum inReuse; - final SortingDocsAndPositionsEnum wrapReuse; - if (reuse != null && reuse instanceof SortingDocsAndPositionsEnum) { - // if we're asked to reuse the given DocsEnum and it is Sorting, return - // the wrapped one, since some Codecs expect it. - wrapReuse = (SortingDocsAndPositionsEnum) reuse; - inReuse = wrapReuse.getWrapped(); - } else { - wrapReuse = null; - inReuse = reuse; - } - - final DocsAndPositionsEnum inDocsAndPositions = in.docsAndPositions(newToOld(liveDocs), inReuse, flags); - if (inDocsAndPositions == null) { - return null; - } - - // we ignore the fact that offsets may be stored but not asked for, - // since this code is expected to be used during addIndexes which will - // ask for everything. if that assumption changes in the future, we can - // factor in whether 'flags' says offsets are not required. - final boolean storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; - return new SortingDocsAndPositionsEnum(docMap.size(), wrapReuse, inDocsAndPositions, docMap, storeOffsets); + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException { + return docs(liveDocs, reuse, flags); } } @@ -505,8 +482,9 @@ public class SortingAtomicReader extends FilterAtomicReader { return in; } } - - static class SortingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum { + + // nocommit - merge this into SortingDocsEnum? + static class SortingDocsAndPositionsEnum extends FilterDocsEnum { /** * A {@link TimSorter} which sorts two parallel arrays of doc IDs and @@ -589,7 +567,7 @@ public class SortingAtomicReader extends FilterAtomicReader { private final RAMFile file; - SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, final DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException { + SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, final DocsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws IOException { super(in); this.maxDoc = maxDoc; this.storeOffsets = storeOffsets; @@ -632,14 +610,14 @@ public class SortingAtomicReader extends FilterAtomicReader { } // for testing - boolean reused(DocsAndPositionsEnum other) { + boolean reused(DocsEnum other) { if (other == null || !(other instanceof SortingDocsAndPositionsEnum)) { return false; } return docs == ((SortingDocsAndPositionsEnum) other).docs; } - private void addPositions(final DocsAndPositionsEnum in, final IndexOutput out) throws IOException { + private void addPositions(final DocsEnum in, final IndexOutput out) throws IOException { int freq = in.freq(); out.writeVInt(freq); int previousPosition = 0; @@ -730,8 +708,8 @@ public class SortingAtomicReader extends FilterAtomicReader { return startOffset; } - /** Returns the wrapped {@link DocsAndPositionsEnum}. */ - DocsAndPositionsEnum getWrapped() { + /** Returns the wrapped {@link org.apache.lucene.index.DocsEnum}. */ + DocsEnum getWrapped() { return in; } } diff --git a/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java b/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java index 3c1acd1..fed973c 100644 --- a/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java +++ b/lucene/misc/src/java/org/apache/lucene/uninverting/DocTermOrds.java @@ -17,16 +17,9 @@ package org.apache.lucene.uninverting; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -import org.apache.lucene.codecs.PostingsFormat; // javadocs +import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; @@ -40,6 +33,12 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.StringHelper; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + /** * This class enables fast access to multiple term ords for * a specified field across all docIDs. @@ -633,7 +632,7 @@ public class DocTermOrds implements Accountable { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { return termsEnum.docsAndPositions(liveDocs, reuse, flags); } diff --git a/lucene/misc/src/java/org/apache/lucene/uninverting/FieldCacheImpl.java b/lucene/misc/src/java/org/apache/lucene/uninverting/FieldCacheImpl.java index 8856292..27d9e7e 100644 --- a/lucene/misc/src/java/org/apache/lucene/uninverting/FieldCacheImpl.java +++ b/lucene/misc/src/java/org/apache/lucene/uninverting/FieldCacheImpl.java @@ -17,15 +17,6 @@ package org.apache.lucene.uninverting; * limitations under the License. */ -import java.io.IOException; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.WeakHashMap; - import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; @@ -49,6 +40,15 @@ import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.WeakHashMap; + /** * Expert: The default cache implementation, storing all values in memory. * A WeakHashMap is used for storage. diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java index 0a43802..b29dccd 100644 --- a/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java +++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java @@ -17,13 +17,6 @@ package org.apache.lucene.index.sorter; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Random; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -44,7 +37,6 @@ import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInvertState; @@ -59,7 +51,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum.SeekStatus; -import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsAndPositionsEnum; import org.apache.lucene.index.sorter.SortingAtomicReader.SortingDocsEnum; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.DocIdSetIterator; @@ -75,6 +66,13 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Random; + public abstract class SorterTestBase extends LuceneTestCase { static final class NormsSimilarity extends Similarity { @@ -252,7 +250,7 @@ public abstract class SorterTestBase extends LuceneTestCase { public void testDocsAndPositionsEnum() throws Exception { TermsEnum termsEnum = reader.terms(DOC_POSITIONS_FIELD).iterator(null); assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM))); - DocsAndPositionsEnum sortedPositions = termsEnum.docsAndPositions(null, null); + DocsEnum sortedPositions = termsEnum.docsAndPositions(null, null); int doc; // test nextDoc() @@ -268,10 +266,10 @@ public abstract class SorterTestBase extends LuceneTestCase { } // test advance() - final DocsAndPositionsEnum reuse = sortedPositions; + final DocsEnum reuse = sortedPositions; sortedPositions = termsEnum.docsAndPositions(null, reuse); - if (sortedPositions instanceof SortingDocsAndPositionsEnum) { - assertTrue(((SortingDocsAndPositionsEnum) sortedPositions).reused(reuse)); // make sure reuse worked + if (sortedPositions instanceof SortingDocsEnum) { + assertTrue(((SortingDocsEnum) sortedPositions).reused(reuse)); // make sure reuse worked } doc = 0; while ((doc = sortedPositions.advance(doc + TestUtil.nextInt(random(), 1, 5))) != DocIdSetIterator.NO_MORE_DOCS) { diff --git a/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java index 9861617..c81051f 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java @@ -17,12 +17,6 @@ package org.apache.lucene.queries; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.Set; -import java.util.Arrays; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -30,13 +24,19 @@ import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.ComplexExplanation; import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Weight; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Set; + /** * Query that sets document score as a programmatic function of several (sub) scores: *

      @@ -234,14 +234,14 @@ public class CustomScoreQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { - Scorer subQueryScorer = subQueryWeight.scorer(context, acceptDocs); + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { + Scorer subQueryScorer = subQueryWeight.scorer(context, flags, acceptDocs); if (subQueryScorer == null) { return null; } Scorer[] valSrcScorers = new Scorer[valSrcWeights.length]; for(int i = 0; i < valSrcScorers.length; i++) { - valSrcScorers[i] = valSrcWeights[i].scorer(context, acceptDocs); + valSrcScorers[i] = valSrcWeights[i].scorer(context, flags, acceptDocs); } return new CustomScorer(CustomScoreQuery.this.getCustomScoreProvider(context), this, queryWeight, subQueryScorer, valSrcScorers); } @@ -333,6 +333,11 @@ public class CustomScoreQuery extends Query { } @Override + public int nextPosition() throws IOException { + return subQueryScorer.nextPosition(); + } + + @Override public Collection getChildren() { return Collections.singleton(new ChildScorer(subQueryScorer, "CUSTOM")); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java index 7ba5f4f..18794a4 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java @@ -17,18 +17,23 @@ package org.apache.lucene.queries.function; * limitations under the License. */ -import org.apache.lucene.search.*; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.search.ComplexExplanation; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; import java.util.Collection; import java.util.Collections; -import java.util.Set; import java.util.Map; +import java.util.Set; /** * Query that is boosted by a ValueSource @@ -97,8 +102,8 @@ public class BoostedQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { - Scorer subQueryScorer = qWeight.scorer(context, acceptDocs); + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { + Scorer subQueryScorer = qWeight.scorer(context, flags, acceptDocs); if (subQueryScorer == null) { return null; } @@ -170,6 +175,11 @@ public class BoostedQuery extends Query { } @Override + public int nextPosition() throws IOException { + return scorer.nextPosition(); + } + + @Override public Collection getChildren() { return Collections.singleton(new ChildScorer(scorer, "CUSTOM")); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java index 726b97e..382fdd2 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java @@ -18,15 +18,20 @@ package org.apache.lucene.queries.function; */ import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.search.*; -import org.apache.lucene.index.MultiFields; +import org.apache.lucene.search.ComplexExplanation; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import java.io.IOException; -import java.util.Set; import java.util.Map; +import java.util.Set; /** @@ -90,13 +95,13 @@ public class FunctionQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { return new AllScorer(context, acceptDocs, this, queryWeight); } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - return ((AllScorer)scorer(context, context.reader().getLiveDocs())).explain(doc); + return ((AllScorer)scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs())).explain(doc); } } @@ -167,6 +172,11 @@ public class FunctionQuery extends Query { return 1; } + @Override + public int nextPosition() throws IOException { + return -1; + } + public Explanation explain(int doc) throws IOException { float sc = qWeight * vals.floatVal(doc); @@ -178,6 +188,7 @@ public class FunctionQuery extends Query { result.addDetail(new Explanation(weight.queryNorm,"queryNorm")); return result; } + } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java index ec8aced..6f4c073 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java @@ -93,6 +93,11 @@ public class ValueSourceScorer extends Scorer { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public long cost() { return maxDoc; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/QueryValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/QueryValueSource.java index 10a5f0d..e2c5529 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/QueryValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/QueryValueSource.java @@ -18,11 +18,15 @@ package org.apache.lucene.queries.function.valuesource; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.FloatDocValues; -import org.apache.lucene.search.*; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueFloat; @@ -123,7 +127,7 @@ class QueryDocValues extends FloatDocValues { try { if (doc < lastDocRequested) { if (noMatches) return defVal; - scorer = weight.scorer(readerContext, acceptDocs); + scorer = weight.scorer(readerContext, DocsEnum.FLAG_FREQS, acceptDocs); if (scorer==null) { noMatches = true; return defVal; @@ -154,7 +158,7 @@ class QueryDocValues extends FloatDocValues { try { if (doc < lastDocRequested) { if (noMatches) return false; - scorer = weight.scorer(readerContext, acceptDocs); + scorer = weight.scorer(readerContext, DocsEnum.FLAG_FREQS, acceptDocs); scorerDoc = -1; if (scorer==null) { noMatches = true; @@ -212,7 +216,7 @@ class QueryDocValues extends FloatDocValues { mval.exists = false; return; } - scorer = weight.scorer(readerContext, acceptDocs); + scorer = weight.scorer(readerContext, DocsEnum.FLAG_FREQS, acceptDocs); scorerDoc = -1; if (scorer==null) { noMatches = true; diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java index 5fe1af4..9f8b6f6 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java @@ -84,6 +84,11 @@ public class TFValueSource extends TermFreqValueSource { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int docID() { return DocIdSetIterator.NO_MORE_DOCS; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java index 3fba5f8..681326e 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java @@ -77,6 +77,11 @@ public class TermFreqValueSource extends DocFreqValueSource { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int docID() { return DocIdSetIterator.NO_MORE_DOCS; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java index 566e48a..8a83b46 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java @@ -17,21 +17,19 @@ package org.apache.lucene.codecs.idversion; * limitations under the License. */ -import java.io.IOException; -import java.util.Collections; - import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.Collections; + final class IDVersionPostingsReader extends PostingsReaderBase { @Override @@ -79,8 +77,8 @@ final class IDVersionPostingsReader extends PostingsReaderBase { } @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, - DocsAndPositionsEnum reuse, int flags) { + public DocsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, + DocsEnum reuse, int flags) { SingleDocsAndPositionsEnum posEnum; if (reuse instanceof SingleDocsAndPositionsEnum) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java index 68ac9fd..4c2e0f7 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java @@ -17,11 +17,7 @@ package org.apache.lucene.codecs.idversion; * limitations under the License. */ -import java.io.IOException; -import java.io.PrintStream; - import org.apache.lucene.codecs.BlockTermState; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.TermState; @@ -37,6 +33,9 @@ import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.PairOutputs.Pair; import org.apache.lucene.util.fst.Util; +import java.io.IOException; +import java.io.PrintStream; + /** Iterates through terms in this field; this class is public so users * can cast it to call {@link #seekExact(BytesRef, long)} for * optimistic-concurreny, and also {@link #getVersion} to get the @@ -1010,7 +1009,7 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits skipDocs, DocsEnum reuse, int flags) throws IOException { if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed: return null; diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/SingleDocsAndPositionsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/SingleDocsAndPositionsEnum.java index eecc700..26006e8 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/SingleDocsAndPositionsEnum.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/SingleDocsAndPositionsEnum.java @@ -17,11 +17,11 @@ package org.apache.lucene.codecs.idversion; * limitations under the License. */ -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -class SingleDocsAndPositionsEnum extends DocsAndPositionsEnum { +class SingleDocsAndPositionsEnum extends DocsEnum { private int doc; private int pos; private int singleDocID; diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/SingleDocsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/SingleDocsEnum.java index b29619c..60bea91 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/SingleDocsEnum.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/SingleDocsEnum.java @@ -20,6 +20,8 @@ package org.apache.lucene.codecs.idversion; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.util.Bits; +import java.io.IOException; + class SingleDocsEnum extends DocsEnum { private int doc; @@ -68,4 +70,9 @@ class SingleDocsEnum extends DocsEnum { public int freq() { return 1; } + + @Override + public int nextPosition() throws IOException { + return -1; + } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java index 6c24bde..17ce5f1 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java @@ -17,15 +17,8 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; @@ -40,6 +33,13 @@ import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Transition; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + // TODO // - compare perf to PhraseQuery exact and sloppy // - optimize: find terms that are in fact MUST (because all paths @@ -312,7 +312,7 @@ public class TermAutomatonQuery extends Query { static class EnumAndScorer { public final int termID; - public final DocsAndPositionsEnum posEnum; + public final DocsEnum posEnum; // How many positions left in the current document: public int posLeft; @@ -320,7 +320,7 @@ public class TermAutomatonQuery extends Query { // Current position public int pos; - public EnumAndScorer(int termID, DocsAndPositionsEnum posEnum) { + public EnumAndScorer(int termID, DocsEnum posEnum) { this.termID = termID; this.posEnum = posEnum; } @@ -372,7 +372,7 @@ public class TermAutomatonQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { // Initialize the enums; null for a given slot means that term didn't appear in this reader EnumAndScorer[] enums = new EnumAndScorer[idToTerm.size()]; diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java index b862869..f2f8b35 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java @@ -17,10 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.Map; - -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.TermAutomatonQuery.EnumAndScorer; import org.apache.lucene.search.TermAutomatonQuery.TermAutomatonWeight; import org.apache.lucene.search.similarities.Similarity; @@ -31,6 +27,9 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.RunAutomaton; +import java.io.IOException; +import java.util.Map; + class TermAutomatonScorer extends Scorer { private final EnumAndScorer[] subs; private final EnumAndScorer[] subsOnDoc; @@ -327,6 +326,11 @@ class TermAutomatonScorer extends Scorer { } @Override + public int nextPosition() throws IOException { + return -1; // nocommit we should be able to implement this + } + + @Override public int docID() { return docID; } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java index 007b095..eed2460 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java @@ -17,18 +17,11 @@ package org.apache.lucene.search.suggest.analyzing; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Set; -import java.util.TreeSet; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.Terms; @@ -41,6 +34,13 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + // TODO: // - allow to use the search score @@ -254,7 +254,7 @@ public class BlendedInfixSuggester extends AnalyzingInfixSuggester { if (matchedTokens.contains(docTerm) || docTerm.startsWith(prefixToken)) { - DocsAndPositionsEnum docPosEnum = it.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS); + DocsEnum docPosEnum = it.docsAndPositions(null, null, DocsEnum.FLAG_OFFSETS); docPosEnum.nextDoc(); // use the first occurrence of the term diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java index de873c0..df44cf2 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java @@ -17,15 +17,11 @@ package org.apache.lucene.codecs.asserting; * limitations under the License. */ -import java.io.IOException; -import java.util.Iterator; - import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.index.AssertingAtomicReader; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; @@ -38,6 +34,9 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.TestUtil; +import java.io.IOException; +import java.util.Iterator; + /** * Just like {@link Lucene41PostingsFormat} but with additional asserts. */ @@ -154,7 +153,6 @@ public final class AssertingPostingsFormat extends PostingsFormat { termsEnum = terms.iterator(termsEnum); BytesRefBuilder lastTerm = null; DocsEnum docsEnum = null; - DocsAndPositionsEnum posEnum = null; boolean hasFreqs = fieldInfo.getIndexOptions().compareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS) >= 0; boolean hasPositions = fieldInfo.getIndexOptions().compareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; @@ -185,13 +183,12 @@ public final class AssertingPostingsFormat extends PostingsFormat { docsEnum = termsEnum.docs(null, docsEnum, flags); } else { if (hasPayloads) { - flags |= DocsAndPositionsEnum.FLAG_PAYLOADS; + flags |= DocsEnum.FLAG_PAYLOADS; } if (hasOffsets) { - flags = flags | DocsAndPositionsEnum.FLAG_OFFSETS; + flags = flags | DocsEnum.FLAG_OFFSETS; } - posEnum = termsEnum.docsAndPositions(null, posEnum, flags); - docsEnum = posEnum; + docsEnum = termsEnum.docsAndPositions(null, docsEnum, flags); } assert docsEnum != null : "termsEnum=" + termsEnum + " hasPositions=" + hasPositions; @@ -213,13 +210,13 @@ public final class AssertingPostingsFormat extends PostingsFormat { int lastPos = -1; int lastStartOffset = -1; for(int i=0;i= lastPos: "pos=" + pos + " vs lastPos=" + lastPos + " i=" + i + " freq=" + freq; lastPos = pos; if (hasOffsets) { - int startOffset = posEnum.startOffset(); - int endOffset = posEnum.endOffset(); + int startOffset = docsEnum.startOffset(); + int endOffset = docsEnum.endOffset(); assert endOffset >= startOffset; assert startOffset >= lastStartOffset; lastStartOffset = startOffset; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java index f811984..007964b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java @@ -17,26 +17,14 @@ package org.apache.lucene.codecs.ramonly; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.atomic.AtomicInteger; - import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.TermStats; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentReadState; @@ -53,6 +41,17 @@ import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicInteger; + /** Stores all postings data in RAM, but writes a small * token (header + single int) to identify which "slot" the * index is using in RAM HashMap. @@ -267,7 +266,6 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { long sumTotalTermFreq = 0; long sumDocFreq = 0; DocsEnum docsEnum = null; - DocsAndPositionsEnum posEnum = null; int enumFlags; IndexOptions indexOptions = fieldInfo.getIndexOptions(); @@ -282,15 +280,15 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { enumFlags = DocsEnum.FLAG_FREQS; } else if (writeOffsets == false) { if (writePayloads) { - enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS; + enumFlags = DocsEnum.FLAG_PAYLOADS; } else { enumFlags = 0; } } else { if (writePayloads) { - enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS | DocsAndPositionsEnum.FLAG_OFFSETS; + enumFlags = DocsEnum.FLAG_PAYLOADS | DocsEnum.FLAG_OFFSETS; } else { - enumFlags = DocsAndPositionsEnum.FLAG_OFFSETS; + enumFlags = DocsEnum.FLAG_OFFSETS; } } @@ -300,14 +298,7 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { break; } RAMPostingsWriterImpl postingsWriter = termsConsumer.startTerm(term); - - if (writePositions) { - posEnum = termsEnum.docsAndPositions(null, posEnum, enumFlags); - docsEnum = posEnum; - } else { - docsEnum = termsEnum.docs(null, docsEnum, enumFlags); - posEnum = null; - } + docsEnum = termsEnum.docs(null, docsEnum, enumFlags); int docFreq = 0; long totalTermFreq = 0; @@ -330,13 +321,13 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { postingsWriter.startDoc(docID, freq); if (writePositions) { for (int i=0;i= current.positions.length) + return NO_MORE_POSITIONS; return current.positions[posUpto++]; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java index cd9b163..4e10922 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java @@ -1,14 +1,14 @@ package org.apache.lucene.index; -import java.io.IOException; -import java.util.Iterator; - import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.VirtualMethod; import org.apache.lucene.util.automaton.CompiledAutomaton; +import java.io.IOException; +import java.util.Iterator; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -144,16 +144,16 @@ public class AssertingAtomicReader extends FilterAtomicReader { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { assert state == State.POSITIONED: "docsAndPositions(...) called on unpositioned TermsEnum"; // TODO: should we give this thing a random to be super-evil, // and randomly *not* unwrap? - if (reuse instanceof AssertingDocsAndPositionsEnum) { - reuse = ((AssertingDocsAndPositionsEnum) reuse).in; + if (reuse instanceof AssertingDocsEnum) { + reuse = ((AssertingDocsEnum) reuse).in; } - DocsAndPositionsEnum docs = super.docsAndPositions(liveDocs, reuse, flags); - return docs == null ? null : new AssertingDocsAndPositionsEnum(docs); + DocsEnum docs = super.docsAndPositions(liveDocs, reuse, flags); + return docs == null ? null : new AssertingDocsEnum(docs); } // TODO: we should separately track if we are 'at the end' ? @@ -256,8 +256,10 @@ public class AssertingAtomicReader extends FilterAtomicReader { /** Wraps a docsenum with additional checks */ public static class AssertingDocsEnum extends FilterDocsEnum { private DocsEnumState state = DocsEnumState.START; + int positionCount = 0; + int positionMax = 0; private int doc; - + public AssertingDocsEnum(DocsEnum in) { this(in, true); } @@ -282,9 +284,12 @@ public class AssertingAtomicReader extends FilterAtomicReader { assert nextDoc > doc : "backwards nextDoc from " + doc + " to " + nextDoc + " " + in; if (nextDoc == DocIdSetIterator.NO_MORE_DOCS) { state = DocsEnumState.FINISHED; + positionMax = 0; } else { state = DocsEnumState.ITERATING; + positionMax = super.freq(); } + positionCount = 0; assert super.docID() == nextDoc; return doc = nextDoc; } @@ -297,9 +302,12 @@ public class AssertingAtomicReader extends FilterAtomicReader { assert advanced >= target : "backwards advance from: " + target + " to: " + advanced; if (advanced == DocIdSetIterator.NO_MORE_DOCS) { state = DocsEnumState.FINISHED; + positionMax = 0; } else { state = DocsEnumState.ITERATING; + positionMax = super.freq(); } + positionCount = 0; assert super.docID() == advanced; return doc = advanced; } @@ -315,18 +323,78 @@ public class AssertingAtomicReader extends FilterAtomicReader { assert state != DocsEnumState.START : "freq() called before nextDoc()/advance()"; assert state != DocsEnumState.FINISHED : "freq() called after NO_MORE_DOCS"; int freq = super.freq(); + if (freq == 0) { + System.out.println(); + } assert freq > 0; return freq; } + + @Override + public int nextPosition() throws IOException { + assert state != DocsEnumState.START : "nextPosition() called before nextDoc()/advance()"; + assert state != DocsEnumState.FINISHED : "nextPosition() called after NO_MORE_DOCS"; + int position = super.nextPosition(); + assert position >= 0 || position == -1 : "invalid position: " + position; + if (positionCount++ >= positionMax) + assert position == NO_MORE_POSITIONS : "nextPosition() does not return NO_MORE_POSITIONS when exhausted"; + return position; + } + + @Override + public int startOffset() throws IOException { + assert state != DocsEnumState.START : "startOffset() called before nextDoc()/advance()"; + assert state != DocsEnumState.FINISHED : "startOffset() called after NO_MORE_DOCS"; + assert positionCount > 0 : "startOffset() called before nextPosition()!"; + assert positionCount <= positionMax : "startOffset() called after NO_MORE_POSITIONS"; + return super.startOffset(); + } + + @Override + public int endOffset() throws IOException { + assert state != DocsEnumState.START : "endOffset() called before nextDoc()/advance()"; + assert state != DocsEnumState.FINISHED : "endOffset() called after NO_MORE_DOCS"; + assert positionCount > 0 : "endOffset() called before nextPosition()!"; + assert positionCount <= positionMax : "endOffset() called after NO_MORE_POSITIONS"; + return super.endOffset(); + } + + @Override + public int startPosition() throws IOException { + assert state != DocsEnumState.START : "startPosition() called before nextDoc()/advance()"; + assert state != DocsEnumState.FINISHED : "startPosition() called after NO_MORE_DOCS"; + assert positionCount > 0 : "startPosition() called before nextPosition()!"; + assert positionCount <= positionMax : "startPosition() called after NO_MORE_POSITIONS"; + return super.startPosition(); + } + + @Override + public int endPosition() throws IOException { + assert state != DocsEnumState.START : "endPosition() called before nextDoc()/advance()"; + assert state != DocsEnumState.FINISHED : "endPosition() called after NO_MORE_DOCS"; + assert positionCount > 0 : "endPosition() called before nextPosition()!"; + assert positionCount <= positionMax : "endPosition() called after NO_MORE_POSITIONS"; + return super.endPosition(); + } + + @Override + public BytesRef getPayload() throws IOException { + assert state != DocsEnumState.START : "getPayload() called before nextDoc()/advance()"; + assert state != DocsEnumState.FINISHED : "getPayload() called after NO_MORE_DOCS"; + assert positionCount > 0 : "getPayload() called before nextPosition()!"; + BytesRef payload = super.getPayload(); + assert payload == null || payload.length > 0 : "getPayload() returned payload with invalid length!"; + return payload; + } } - static class AssertingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum { + static class AssertingDocsAndPositionsEnum extends FilterDocsEnum { private DocsEnumState state = DocsEnumState.START; private int positionMax = 0; private int positionCount = 0; private int doc; - public AssertingDocsAndPositionsEnum(DocsAndPositionsEnum in) { + public AssertingDocsAndPositionsEnum(DocsEnum in) { super(in); int docid = in.docID(); assert docid == -1 : "invalid initial doc id: " + docid; diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java index b6ba0e1..209cc67 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java @@ -17,25 +17,6 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.File; -import java.io.IOException; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.EnumSet; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Random; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldsConsumer; @@ -62,12 +43,30 @@ import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.Version; import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings; import org.apache.lucene.util.automaton.AutomatonTestUtil; +import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings; import org.apache.lucene.util.automaton.CompiledAutomaton; import org.junit.AfterClass; import org.junit.BeforeClass; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Random; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + /** * Abstract class to do basic tests for a postings format. * NOTE: This test focuses on the postings @@ -122,7 +121,7 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest /** Given the same random seed this always enumerates the * same random postings */ - private static class SeedPostings extends DocsAndPositionsEnum { + private static class SeedPostings extends DocsEnum { // Used only to generate docIDs; this way if you pull w/ // or w/o positions you get the same docID sequence: private final Random docRandom; @@ -234,7 +233,9 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest posUpto = freq; return 0; } - assert posUpto < freq; + //assert posUpto < freq; + if (posUpto >= freq) + return NO_MORE_POSITIONS; if (posUpto == 0 && random.nextBoolean()) { // Sometimes index pos = 0 @@ -651,17 +652,17 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest } @Override - public final DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + public final DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { if (liveDocs != null) { throw new IllegalArgumentException("liveDocs must be null"); } if (maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { return null; } - if ((flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0 && maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) { + if ((flags & DocsEnum.FLAG_OFFSETS) == DocsEnum.FLAG_OFFSETS && maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) { return null; } - if ((flags & DocsAndPositionsEnum.FLAG_PAYLOADS) != 0 && allowPayloads == false) { + if ((flags & DocsEnum.FLAG_PAYLOADS) == DocsEnum.FLAG_PAYLOADS && allowPayloads == false) { return null; } return getSeedPostings(current.getKey().utf8ToString(), current.getValue().seed, false, maxAllowed, allowPayloads); @@ -749,7 +750,6 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest private static class ThreadState { // Only used with REUSE option: public DocsEnum reuseDocsEnum; - public DocsAndPositionsEnum reuseDocsAndPositionsEnum; } private void verifyEnum(ThreadState threadState, @@ -815,31 +815,29 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest DocsEnum prevDocsEnum = null; DocsEnum docsEnum; - DocsAndPositionsEnum docsAndPositionsEnum; if (!doCheckPositions) { if (allowPositions && random().nextInt(10) == 7) { // 10% of the time, even though we will not check positions, pull a DocsAndPositions enum if (options.contains(Option.REUSE_ENUMS) && random().nextInt(10) < 9) { - prevDocsEnum = threadState.reuseDocsAndPositionsEnum; + prevDocsEnum = threadState.reuseDocsEnum; } - int flags = 0; + int flags = DocsEnum.FLAG_NONE; if (alwaysTestMax || random().nextBoolean()) { - flags |= DocsAndPositionsEnum.FLAG_OFFSETS; + flags |= DocsEnum.FLAG_OFFSETS; } if (alwaysTestMax || random().nextBoolean()) { - flags |= DocsAndPositionsEnum.FLAG_PAYLOADS; + flags |= DocsEnum.FLAG_PAYLOADS; } if (VERBOSE) { - System.out.println(" get DocsAndPositionsEnum (but we won't check positions) flags=" + flags); + System.out.println(" get DocsEnum (but we won't check positions) flags=" + flags); } - threadState.reuseDocsAndPositionsEnum = termsEnum.docsAndPositions(liveDocs, (DocsAndPositionsEnum) prevDocsEnum, flags); - docsEnum = threadState.reuseDocsAndPositionsEnum; - docsAndPositionsEnum = threadState.reuseDocsAndPositionsEnum; + threadState.reuseDocsEnum = termsEnum.docsAndPositions(liveDocs, prevDocsEnum, flags); + docsEnum = threadState.reuseDocsEnum; } else { if (VERBOSE) { System.out.println(" get DocsEnum"); @@ -849,28 +847,26 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest } threadState.reuseDocsEnum = termsEnum.docs(liveDocs, prevDocsEnum, doCheckFreqs ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); docsEnum = threadState.reuseDocsEnum; - docsAndPositionsEnum = null; } } else { if (options.contains(Option.REUSE_ENUMS) && random().nextInt(10) < 9) { - prevDocsEnum = threadState.reuseDocsAndPositionsEnum; + prevDocsEnum = threadState.reuseDocsEnum; } - int flags = 0; + int flags = DocsEnum.FLAG_NONE; if (alwaysTestMax || doCheckOffsets || random().nextInt(3) == 1) { - flags |= DocsAndPositionsEnum.FLAG_OFFSETS; + flags |= DocsEnum.FLAG_OFFSETS; } if (alwaysTestMax || doCheckPayloads|| random().nextInt(3) == 1) { - flags |= DocsAndPositionsEnum.FLAG_PAYLOADS; + flags |= DocsEnum.FLAG_PAYLOADS; } if (VERBOSE) { - System.out.println(" get DocsAndPositionsEnum flags=" + flags); + System.out.println(" get DocsEnum flags=" + flags); } - threadState.reuseDocsAndPositionsEnum = termsEnum.docsAndPositions(liveDocs, (DocsAndPositionsEnum) prevDocsEnum, flags); - docsEnum = threadState.reuseDocsAndPositionsEnum; - docsAndPositionsEnum = threadState.reuseDocsAndPositionsEnum; + threadState.reuseDocsEnum = termsEnum.docsAndPositions(liveDocs, prevDocsEnum, flags); + docsEnum = threadState.reuseDocsEnum; } assertNotNull("null DocsEnum", docsEnum); @@ -1012,7 +1008,7 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest if (VERBOSE) { System.out.println(" now nextPosition to " + pos); } - assertEquals("position is wrong", pos, docsAndPositionsEnum.nextPosition()); + assertEquals("position is wrong", pos, docsEnum.nextPosition()); if (doCheckPayloads) { BytesRef expectedPayload = expected.getPayload(); @@ -1021,9 +1017,9 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest System.out.println(" now check expectedPayload length=" + (expectedPayload == null ? 0 : expectedPayload.length)); } if (expectedPayload == null || expectedPayload.length == 0) { - assertNull("should not have payload", docsAndPositionsEnum.getPayload()); + assertNull("should not have payload", docsEnum.getPayload()); } else { - BytesRef payload = docsAndPositionsEnum.getPayload(); + BytesRef payload = docsEnum.getPayload(); assertNotNull("should have payload but doesn't", payload); assertEquals("payload length is wrong", expectedPayload.length, payload.length); @@ -1035,7 +1031,7 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest // make a deep copy payload = BytesRef.deepCopyOf(payload); - assertEquals("2nd call to getPayload returns something different!", payload, docsAndPositionsEnum.getPayload()); + assertEquals("2nd call to getPayload returns something different!", payload, docsEnum.getPayload()); } } else { if (VERBOSE) { @@ -1049,8 +1045,8 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest if (VERBOSE) { System.out.println(" now check offsets: startOff=" + expected.startOffset() + " endOffset=" + expected.endOffset()); } - assertEquals("startOffset is wrong", expected.startOffset(), docsAndPositionsEnum.startOffset()); - assertEquals("endOffset is wrong", expected.endOffset(), docsAndPositionsEnum.endOffset()); + assertEquals("startOffset is wrong", expected.startOffset(), docsEnum.startOffset()); + assertEquals("endOffset is wrong", expected.endOffset(), docsEnum.endOffset()); } else { if (VERBOSE) { System.out.println(" skip check offsets"); @@ -1060,8 +1056,8 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest if (VERBOSE) { System.out.println(" now check offsets are -1"); } - assertEquals("startOffset isn't -1", -1, docsAndPositionsEnum.startOffset()); - assertEquals("endOffset isn't -1", -1, docsAndPositionsEnum.endOffset()); + assertEquals("startOffset isn't -1", -1, docsEnum.startOffset()); + assertEquals("endOffset isn't -1", -1, docsEnum.endOffset()); } } } @@ -1578,8 +1574,6 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest if (random().nextBoolean()) { docs = termsEnum.docs(null, docs, DocsEnum.FLAG_FREQS); - } else if (docs instanceof DocsAndPositionsEnum) { - docs = termsEnum.docsAndPositions(null, (DocsAndPositionsEnum) docs, 0); } else { docs = termsEnum.docsAndPositions(null, null, 0); } @@ -1588,8 +1582,8 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { docFreq++; totalTermFreq += docs.freq(); - if (docs instanceof DocsAndPositionsEnum) { - DocsAndPositionsEnum posEnum = (DocsAndPositionsEnum) docs; + if (docs instanceof DocsEnum) { + DocsEnum posEnum = (DocsEnum) docs; int limit = TestUtil.nextInt(random(), 1, docs.freq()); for(int i=0;i termsEnum = new ThreadLocal<>(); private final ThreadLocal docsEnum = new ThreadLocal<>(); - private final ThreadLocal docsAndPositionsEnum = new ThreadLocal<>(); + private final ThreadLocal docsAndPositionsEnum = new ThreadLocal<>(); protected void assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) throws IOException { assertEquals(1, terms.getDocCount()); @@ -420,7 +419,7 @@ public abstract class BaseTermVectorsFormatTestCase extends BaseIndexFileFormatT this.docsEnum.set(docsEnum); bits.clear(0); - DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.docsAndPositions(bits, random().nextBoolean() ? null : this.docsAndPositionsEnum.get()); + DocsEnum docsAndPositionsEnum = termsEnum.docsAndPositions(bits, random().nextBoolean() ? null : this.docsEnum.get()); assertEquals(ft.storeTermVectorOffsets() || ft.storeTermVectorPositions(), docsAndPositionsEnum != null); if (docsAndPositionsEnum != null) { assertEquals(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); @@ -486,7 +485,7 @@ public abstract class BaseTermVectorsFormatTestCase extends BaseIndexFileFormatT } assertEquals(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); } - this.docsAndPositionsEnum.set(docsAndPositionsEnum); + this.docsEnum.set(docsAndPositionsEnum); } assertNull(termsEnum.next()); for (int i = 0; i < 5; ++i) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java index 47725db..d60380c 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java @@ -17,16 +17,17 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.List; -import java.util.Random; -import java.util.concurrent.ExecutorService; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.TestUtil; +import java.io.IOException; +import java.util.List; +import java.util.Random; +import java.util.concurrent.ExecutorService; + /** * Helper class that adds some extra checks to ensure correct * usage of {@code IndexSearcher} and {@code Weight}. @@ -65,6 +66,21 @@ public class AssertingIndexSearcher extends IndexSearcher { } @Override + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { + Scorer scorer = w.scorer(context, flags, acceptDocs); + if (scorer != null) { + // check that scorer obeys disi contract for docID() before next()/advance + try { + int docid = scorer.docID(); + assert docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS; + } catch (UnsupportedOperationException ignored) { + // from a top-level BS1 + } + } + return scorer; + } + + @Override public float getValueForNormalization() { throw new IllegalStateException("Weight already normalized."); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java index 705a8a0..9d184e3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java @@ -17,6 +17,8 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.AssertingAtomicReader; + import java.io.IOException; import java.lang.ref.WeakReference; import java.util.Collection; @@ -25,8 +27,6 @@ import java.util.Map; import java.util.Random; import java.util.WeakHashMap; -import org.apache.lucene.index.AssertingAtomicReader; - /** Wraps a Scorer with additional checks */ public class AssertingScorer extends Scorer { @@ -111,18 +111,28 @@ public class AssertingScorer extends Scorer { } @Override + public int nextPosition() throws IOException { + assert iterating(); + return in.nextPosition(); + } + + @Override public int docID() { return in.docID(); } @Override public int nextDoc() throws IOException { - return docsEnumIn.nextDoc(); + int doc = docsEnumIn.nextDoc(); + assert in.startPosition() == -1; + return doc; } @Override public int advance(int target) throws IOException { - return docsEnumIn.advance(target); + int doc = docsEnumIn.advance(target); + assert in.startPosition() == -1; + return doc; } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java index b075247..08b254b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java @@ -17,12 +17,12 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.Random; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.Random; + class AssertingWeight extends Weight { static Weight wrap(Random random, Weight other) { @@ -60,18 +60,18 @@ class AssertingWeight extends Weight { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { // if the caller asks for in-order scoring or if the weight does not support // out-of order scoring then collection will have to happen in-order. - final Scorer inScorer = in.scorer(context, acceptDocs); + final Scorer inScorer = in.scorer(context, flags, acceptDocs); return AssertingScorer.wrap(new Random(random.nextLong()), inScorer); } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) throws IOException { // if the caller asks for in-order scoring or if the weight does not support // out-of order scoring then collection will have to happen in-order. - BulkScorer inScorer = in.bulkScorer(context, scoreDocsInOrder, acceptDocs); + BulkScorer inScorer = in.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); if (inScorer == null) { return null; } @@ -83,7 +83,7 @@ class AssertingWeight extends Weight { } else if (random.nextBoolean()) { // Let super wrap this.scorer instead, so we use // AssertingScorer: - inScorer = super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + inScorer = super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); } if (scoreDocsInOrder == false && random.nextBoolean()) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index 9320af9..cd6051d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -17,19 +17,15 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.List; -import java.util.Random; - import junit.framework.Assert; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.AllDeletedFilterReader; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; @@ -39,6 +35,10 @@ import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.List; +import java.util.Random; + /** * Utility class for sanity-checking queries. */ @@ -264,7 +264,7 @@ public class QueryUtils { if (scorer == null) { Weight w = s.createNormalizedWeight(q); AtomicReaderContext context = readerContextArray.get(leafPtr); - scorer = w.scorer(context, context.reader().getLiveDocs()); + scorer = w.scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); } int op = order[(opidx[0]++) % order.length]; @@ -311,7 +311,7 @@ public class QueryUtils { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q); AtomicReaderContext ctx = (AtomicReaderContext)indexSearcher.getTopReaderContext(); - Scorer scorer = w.scorer(ctx, ctx.reader().getLiveDocs()); + Scorer scorer = w.scorer(ctx, DocsEnum.FLAG_FREQS, ctx.reader().getLiveDocs()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); @@ -338,7 +338,7 @@ public class QueryUtils { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q); AtomicReaderContext ctx = previousReader.getContext(); - Scorer scorer = w.scorer(ctx, ctx.reader().getLiveDocs()); + Scorer scorer = w.scorer(ctx, DocsEnum.FLAG_FREQS, ctx.reader().getLiveDocs()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); @@ -369,7 +369,7 @@ public class QueryUtils { long startMS = System.currentTimeMillis(); for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = s.createNormalizedWeight(q); - Scorer scorer = w.scorer(context.get(leafPtr), liveDocs); + Scorer scorer = w.scorer(context.get(leafPtr), DocsEnum.FLAG_FREQS, liveDocs); Assert.assertTrue("query collected "+doc+" but skipTo("+i+") says no more docs!",scorer.advance(i) != DocIdSetIterator.NO_MORE_DOCS); Assert.assertEquals("query collected "+doc+" but skipTo("+i+") got to "+scorer.docID(),doc,scorer.docID()); float skipToScore = scorer.score(); @@ -397,7 +397,7 @@ public class QueryUtils { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q); - Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), previousReader.getLiveDocs()); + Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), DocsEnum.FLAG_FREQS, previousReader.getLiveDocs()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); @@ -422,7 +422,7 @@ public class QueryUtils { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q); - Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), previousReader.getLiveDocs()); + Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), DocsEnum.FLAG_FREQS, previousReader.getLiveDocs()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index c9fc75f..715cf61 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -17,6 +17,72 @@ package org.apache.lucene.util; * limitations under the License. */ +import com.carrotsearch.randomizedtesting.JUnit4MethodProvider; +import com.carrotsearch.randomizedtesting.LifecycleScope; +import com.carrotsearch.randomizedtesting.MixWithSuiteName; +import com.carrotsearch.randomizedtesting.RandomizedContext; +import com.carrotsearch.randomizedtesting.RandomizedRunner; +import com.carrotsearch.randomizedtesting.RandomizedTest; +import com.carrotsearch.randomizedtesting.annotations.Listeners; +import com.carrotsearch.randomizedtesting.annotations.SeedDecorators; +import com.carrotsearch.randomizedtesting.annotations.TestGroup; +import com.carrotsearch.randomizedtesting.annotations.TestMethodProviders; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup.Group; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; +import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; +import com.carrotsearch.randomizedtesting.generators.RandomPicks; +import com.carrotsearch.randomizedtesting.rules.NoClassHooksShadowingRule; +import com.carrotsearch.randomizedtesting.rules.NoInstanceHooksOverridesRule; +import com.carrotsearch.randomizedtesting.rules.StaticFieldsInvariantRule; +import com.carrotsearch.randomizedtesting.rules.SystemPropertiesInvariantRule; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.*; +import org.apache.lucene.index.IndexReader.ReaderClosedListener; +import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.search.AssertingIndexSearcher; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.QueryUtils.FCInvisibleMultiReader; +import org.apache.lucene.store.BaseDirectoryWrapper; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.FlushInfo; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IOContext.Context; +import org.apache.lucene.store.LockFactory; +import org.apache.lucene.store.MergeInfo; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.MockDirectoryWrapper.Throttling; +import org.apache.lucene.store.NRTCachingDirectory; +import org.apache.lucene.store.RateLimitedDirectoryWrapper; +import org.apache.lucene.util.automaton.AutomatonTestUtil; +import org.apache.lucene.util.automaton.CompiledAutomaton; +import org.apache.lucene.util.automaton.RegExp; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; +import org.junit.runner.RunWith; + import java.io.Closeable; import java.io.FileNotFoundException; import java.io.IOException; @@ -55,118 +121,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.logging.Logger; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.SegmentInfoFormat; -import org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat; -import org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoFormat; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.AlcoholicMergePolicy; -import org.apache.lucene.index.AssertingAtomicReader; -import org.apache.lucene.index.AssertingDirectoryReader; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.CompositeReader; -import org.apache.lucene.index.ConcurrentMergeScheduler; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldFilterAtomicReader; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.IndexReader.ReaderClosedListener; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LiveIndexWriterConfig; -import org.apache.lucene.index.LogByteSizeMergePolicy; -import org.apache.lucene.index.LogDocMergePolicy; -import org.apache.lucene.index.LogMergePolicy; -import org.apache.lucene.index.MergePolicy; -import org.apache.lucene.index.MergeScheduler; -import org.apache.lucene.index.MockRandomMergePolicy; -import org.apache.lucene.index.MultiDocValues; -import org.apache.lucene.index.MultiFields; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.ParallelAtomicReader; -import org.apache.lucene.index.ParallelCompositeReader; -import org.apache.lucene.index.SegmentReader; -import org.apache.lucene.index.SerialMergeScheduler; -import org.apache.lucene.index.SimpleMergedSegmentWarmer; -import org.apache.lucene.index.SlowCompositeReaderWrapper; -import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.index.StorableField; -import org.apache.lucene.index.StoredDocument; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum.SeekStatus; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.TieredMergePolicy; -import org.apache.lucene.search.AssertingIndexSearcher; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.QueryUtils.FCInvisibleMultiReader; -import org.apache.lucene.store.BaseDirectoryWrapper; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.FlushInfo; -import org.apache.lucene.store.IOContext.Context; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.LockFactory; -import org.apache.lucene.store.MergeInfo; -import org.apache.lucene.store.MockDirectoryWrapper.Throttling; -import org.apache.lucene.store.MockDirectoryWrapper; -import org.apache.lucene.store.NRTCachingDirectory; -import org.apache.lucene.store.RateLimitedDirectoryWrapper; -import org.apache.lucene.util.automaton.AutomatonTestUtil; -import org.apache.lucene.util.automaton.CompiledAutomaton; -import org.apache.lucene.util.automaton.RegExp; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.RuleChain; -import org.junit.rules.TestRule; -import org.junit.runner.RunWith; - -import com.carrotsearch.randomizedtesting.JUnit4MethodProvider; -import com.carrotsearch.randomizedtesting.LifecycleScope; -import com.carrotsearch.randomizedtesting.MixWithSuiteName; -import com.carrotsearch.randomizedtesting.RandomizedContext; -import com.carrotsearch.randomizedtesting.RandomizedRunner; -import com.carrotsearch.randomizedtesting.RandomizedTest; -import com.carrotsearch.randomizedtesting.annotations.Listeners; -import com.carrotsearch.randomizedtesting.annotations.SeedDecorators; -import com.carrotsearch.randomizedtesting.annotations.TestGroup; -import com.carrotsearch.randomizedtesting.annotations.TestMethodProviders; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup.Group; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; -import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; -import com.carrotsearch.randomizedtesting.generators.RandomPicks; -import com.carrotsearch.randomizedtesting.rules.NoClassHooksShadowingRule; -import com.carrotsearch.randomizedtesting.rules.NoInstanceHooksOverridesRule; -import com.carrotsearch.randomizedtesting.rules.StaticFieldsInvariantRule; -import com.carrotsearch.randomizedtesting.rules.SystemPropertiesInvariantRule; - import static com.carrotsearch.randomizedtesting.RandomizedTest.systemPropertyAsBoolean; import static com.carrotsearch.randomizedtesting.RandomizedTest.systemPropertyAsInt; @@ -1824,8 +1778,8 @@ public abstract class LuceneTestCase extends Assert { public void assertTermsEnumEquals(String info, IndexReader leftReader, TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep) throws IOException { BytesRef term; Bits randomBits = new RandomBits(leftReader.maxDoc(), random().nextDouble(), random()); - DocsAndPositionsEnum leftPositions = null; - DocsAndPositionsEnum rightPositions = null; + DocsEnum leftPositions = null; + DocsEnum rightPositions = null; DocsEnum leftDocs = null; DocsEnum rightDocs = null; @@ -1889,7 +1843,7 @@ public abstract class LuceneTestCase extends Assert { /** * checks docs + freqs + positions + payloads, sequentially */ - public void assertDocsAndPositionsEnumEquals(String info, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws IOException { + public void assertDocsAndPositionsEnumEquals(String info, DocsEnum leftDocs, DocsEnum rightDocs) throws IOException { if (leftDocs == null || rightDocs == null) { assertNull(leftDocs); assertNull(rightDocs); @@ -1968,7 +1922,7 @@ public abstract class LuceneTestCase extends Assert { /** * checks advancing docs + positions */ - public void assertPositionsSkippingEquals(String info, IndexReader leftReader, int docFreq, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws IOException { + public void assertPositionsSkippingEquals(String info, IndexReader leftReader, int docFreq, DocsEnum leftDocs, DocsEnum rightDocs) throws IOException { if (leftDocs == null || rightDocs == null) { assertNull(leftDocs); assertNull(rightDocs); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java index 33b442f..7dd5fed 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java @@ -66,7 +66,6 @@ import org.apache.lucene.index.CheckIndex.Status.StoredFieldStatus; import org.apache.lucene.index.CheckIndex.Status.TermIndexStatus; import org.apache.lucene.index.CheckIndex.Status.TermVectorStatus; import org.apache.lucene.index.ConcurrentMergeScheduler; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FilterAtomicReader; @@ -904,13 +903,13 @@ public final class TestUtil { if (random.nextBoolean()) { final int posFlags; switch (random.nextInt(4)) { - case 0: posFlags = 0; break; - case 1: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS; break; - case 2: posFlags = DocsAndPositionsEnum.FLAG_PAYLOADS; break; - default: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS; break; + case 0: posFlags = DocsEnum.FLAG_NONE; break; + case 1: posFlags = DocsEnum.FLAG_OFFSETS; break; + case 2: posFlags = DocsEnum.FLAG_PAYLOADS; break; + default: posFlags = DocsEnum.FLAG_OFFSETS | DocsEnum.FLAG_PAYLOADS; break; } // TODO: cast to DocsAndPositionsEnum? - DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, posFlags); + DocsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, posFlags); if (docsAndPositions != null) { return docsAndPositions; } diff --git a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java index 5781036..0fc9ca0 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java @@ -22,10 +22,10 @@ import com.carrotsearch.hppc.IntObjectOpenHashMap; import com.carrotsearch.hppc.IntOpenHashSet; import com.carrotsearch.hppc.cursors.IntObjectCursor; import com.carrotsearch.hppc.cursors.ObjectCursor; - import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSetIterator; @@ -39,7 +39,6 @@ import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.FixedBitSet; import org.apache.solr.common.SolrDocumentList; @@ -352,6 +351,11 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia }; } + @Override + public int postingFeatures() { + return DocsEnum.FLAG_NONE; + } + public IntObjectMap getGroups() { return groups; } diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 45d5731..f871227 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -17,22 +17,6 @@ package org.apache.solr.handler.component; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.regex.Pattern; - -import org.apache.commons.lang.StringUtils; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.ReaderUtil; @@ -54,7 +38,13 @@ import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.*; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.CursorMarkParams; +import org.apache.solr.common.params.GroupParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.MoreLikeThisParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; @@ -73,11 +63,11 @@ import org.apache.solr.search.Grouping; import org.apache.solr.search.QParser; import org.apache.solr.search.QParserPlugin; import org.apache.solr.search.QueryParsing; +import org.apache.solr.search.RankQuery; import org.apache.solr.search.ReturnFields; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrReturnFields; import org.apache.solr.search.SortSpec; -import org.apache.solr.search.RankQuery; import org.apache.solr.search.SyntaxError; import org.apache.solr.search.grouping.CommandHandler; import org.apache.solr.search.grouping.GroupingSpecification; @@ -99,8 +89,22 @@ import org.apache.solr.search.grouping.endresulttransformer.GroupedEndResultTran import org.apache.solr.search.grouping.endresulttransformer.MainEndResultTransformer; import org.apache.solr.search.grouping.endresulttransformer.SimpleEndResultTransformer; import org.apache.solr.util.SolrPluginUtils; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; -import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.regex.Pattern; /** * TODO! @@ -1314,6 +1318,11 @@ public class QueryComponent extends SearchComponent } @Override + public int nextPosition() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public int nextDoc() throws IOException { throw new UnsupportedOperationException(); } diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java index e807daa..602282e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java @@ -13,7 +13,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; @@ -335,7 +335,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar docNL.add(field, fieldNL); BytesRef text; - DocsAndPositionsEnum dpEnum = null; + DocsEnum dpEnum = null; while((text = termsEnum.next()) != null) { String term = text.utf8ToString(); NamedList termInfo = new NamedList<>(); diff --git a/solr/core/src/java/org/apache/solr/schema/LatLonType.java b/solr/core/src/java/org/apache/solr/schema/LatLonType.java index de5bc61..34a599c 100644 --- a/solr/core/src/java/org/apache/solr/schema/LatLonType.java +++ b/solr/core/src/java/org/apache/solr/schema/LatLonType.java @@ -16,22 +16,18 @@ package org.apache.solr.schema; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; - +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.distance.DistanceUtils; import com.spatial4j.core.shape.Point; - +import com.spatial4j.core.shape.Rectangle; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.StorableField; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.VectorValueSource; -import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ComplexExplanation; @@ -50,13 +46,14 @@ import org.apache.solr.search.ExtendedQueryBase; import org.apache.solr.search.PostFilter; import org.apache.solr.search.QParser; import org.apache.solr.search.SpatialOptions; - -import com.spatial4j.core.context.SpatialContext; -import com.spatial4j.core.distance.DistanceUtils; -import com.spatial4j.core.shape.Rectangle; - import org.apache.solr.util.SpatialUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; + /** * Represents a Latitude/Longitude as a 2 dimensional point. Latitude is always specified first. @@ -342,13 +339,13 @@ class SpatialDistanceQuery extends ExtendedQueryBase implements PostFilter { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { return new SpatialScorer(context, acceptDocs, this, queryWeight); } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - return ((SpatialScorer)scorer(context, context.reader().getLiveDocs())).explain(doc); + return ((SpatialScorer)scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs())).explain(doc); } } @@ -485,6 +482,11 @@ class SpatialDistanceQuery extends ExtendedQueryBase implements PostFilter { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public long cost() { return maxDoc; } @@ -507,6 +509,7 @@ class SpatialDistanceQuery extends ExtendedQueryBase implements PostFilter { result.addDetail(new Explanation(weight.queryNorm,"queryNorm")); return result; } + } @Override diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java index 2073ffe..7c9674e 100644 --- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java @@ -17,11 +17,10 @@ package org.apache.solr.search; -import java.io.IOException; -import java.util.Arrays; -import java.util.Iterator; -import java.util.Map; - +import com.carrotsearch.hppc.FloatArrayList; +import com.carrotsearch.hppc.IntIntOpenHashMap; +import com.carrotsearch.hppc.IntOpenHashSet; +import com.carrotsearch.hppc.cursors.IntIntCursor; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.NumericDocValues; @@ -48,10 +47,10 @@ import org.apache.solr.schema.TrieFloatField; import org.apache.solr.schema.TrieIntField; import org.apache.solr.schema.TrieLongField; -import com.carrotsearch.hppc.FloatArrayList; -import com.carrotsearch.hppc.IntOpenHashSet; -import com.carrotsearch.hppc.IntIntOpenHashMap; -import com.carrotsearch.hppc.cursors.IntIntCursor; +import java.io.IOException; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Map; /** @@ -353,6 +352,11 @@ public class CollapsingQParserPlugin extends QParserPlugin { return 0; } + @Override + public int nextPosition() throws IOException { + return -1; + } + public int advance(int i) { return -1; } diff --git a/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java index a2b4958..872eded 100644 --- a/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java @@ -16,13 +16,6 @@ */ package org.apache.solr.search; -import java.io.Closeable; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Fields; @@ -57,6 +50,13 @@ import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.schema.TrieField; import org.apache.solr.util.RefCounted; +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Set; + public class JoinQParserPlugin extends QParserPlugin { public static final String NAME = "join"; @@ -232,7 +232,7 @@ class JoinQuery extends Query { @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { if (filter == null) { boolean debug = rb != null && rb.isDebug(); long start = debug ? System.currentTimeMillis() : 0; @@ -501,7 +501,7 @@ class JoinQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, context.reader().getLiveDocs()); + Scorer scorer = scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); boolean exists = scorer.advance(doc) == doc; ComplexExplanation result = new ComplexExplanation(); @@ -556,6 +556,11 @@ class JoinQuery extends Query { } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int advance(int target) throws IOException { return iter.advance(target); } diff --git a/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java index bf25ad9..180282e 100644 --- a/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java @@ -194,8 +194,8 @@ public class ReRankQParserPlugin extends QParserPlugin { return mainWeight.getValueForNormalization(); } - public Scorer scorer(AtomicReaderContext context, Bits bits) throws IOException { - return mainWeight.scorer(context, bits); + public Scorer scorer(AtomicReaderContext context, int flags, Bits bits) throws IOException { + return mainWeight.scorer(context, flags, bits); } public Query getQuery() { diff --git a/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java b/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java index dfdf7ad..16585ee 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java +++ b/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java @@ -1,15 +1,24 @@ package org.apache.solr.search; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.queries.function.ValueSource; -import org.apache.lucene.search.*; +import org.apache.lucene.search.ComplexExplanation; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.AtomicReaderContext; import org.apache.solr.common.SolrException; import java.io.IOException; -import java.util.Set; import java.util.Map; +import java.util.Set; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -119,7 +128,7 @@ public class SolrConstantScoreQuery extends ConstantScoreQuery implements Extend } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { return new ConstantScorer(context, this, queryWeight, acceptDocs); } @@ -192,6 +201,11 @@ public class SolrConstantScoreQuery extends ConstantScoreQuery implements Extend } @Override + public int nextPosition() throws IOException { + return -1; + } + + @Override public int advance(int target) throws IOException { return docIdSetIterator.advance(target); } diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index bd2110c..0e77384 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -17,23 +17,6 @@ package org.apache.solr.search; -import java.io.Closeable; -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; @@ -60,34 +43,7 @@ import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.FieldDoc; -import org.apache.lucene.search.Filter; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.MultiCollector; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.SimpleCollector; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TimeLimitingCollector; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopDocsCollector; -import org.apache.lucene.search.TopFieldCollector; -import org.apache.lucene.search.TopFieldDocs; -import org.apache.lucene.search.TopScoreDocCollector; -import org.apache.lucene.search.TotalHitCountCollector; -import org.apache.lucene.search.Weight; +import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.Bits; @@ -114,6 +70,23 @@ import org.apache.solr.update.SolrIndexConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.Closeable; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + /** * SolrIndexSearcher adds schema awareness and caching functionality @@ -2451,7 +2424,7 @@ class FilterImpl extends Filter { iterators.add(iter); } for (Weight w : weights) { - Scorer scorer = w.scorer(context, context.reader().getLiveDocs()); + Scorer scorer = w.scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()); if (scorer == null) return null; iterators.add(scorer); } diff --git a/solr/core/src/java/org/apache/solr/search/join/IgnoreAcceptDocsQuery.java b/solr/core/src/java/org/apache/solr/search/join/IgnoreAcceptDocsQuery.java index d61b4ce..5cdda88 100644 --- a/solr/core/src/java/org/apache/solr/search/join/IgnoreAcceptDocsQuery.java +++ b/solr/core/src/java/org/apache/solr/search/join/IgnoreAcceptDocsQuery.java @@ -86,8 +86,8 @@ public class IgnoreAcceptDocsQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { - return w.scorer(context, null); + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { + return w.scorer(context, flags, null); } } diff --git a/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java b/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java index 1c1fccc..8fbaae0 100644 --- a/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java +++ b/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java @@ -17,8 +17,6 @@ package org.apache.solr.update; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; @@ -31,6 +29,8 @@ import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.Bits; import org.apache.solr.schema.IndexSchema; +import java.io.IOException; + /** * Allows access to uninverted docvalues by delete-by-queries. * this is used e.g. to implement versioning constraints in solr. @@ -82,8 +82,8 @@ final class DeleteByQueryWrapper extends Query { public void normalize(float norm, float topLevelBoost) { inner.normalize(norm, topLevelBoost); } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { - return inner.scorer(privateContext.getIndexReader().leaves().get(0), acceptDocs); + public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException { + return inner.scorer(privateContext.getIndexReader().leaves().get(0), flags, acceptDocs); } }; }