* A {@link PostingsFormat} useful for low doc-frequency fields such as primary
@@ -386,11 +385,10 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
public long totalTermFreq() throws IOException {
return delegate().totalTermFreq();
}
-
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
- DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs,
+ DocsEnum reuse, int flags) throws IOException {
return delegate().docsAndPositions(liveDocs, reuse, flags);
}
@@ -399,6 +397,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
throws IOException {
return delegate().docs(liveDocs, reuse, flags);
}
+
}
@Override
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
index 418ae8c..22b9e05 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
@@ -17,20 +17,13 @@ package org.apache.lucene.codecs.memory;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.TreeMap;
-
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
-import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.SegmentReadState;
@@ -50,6 +43,12 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.RunAutomaton;
import org.apache.lucene.util.automaton.Transition;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
// TODO:
// - build depth-N prefix hash?
// - or: longer dense skip lists than just next byte?
@@ -334,7 +333,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
BytesRef term;
DocsEnum docsEnum = null;
- DocsAndPositionsEnum docsAndPositionsEnum = null;
+ DocsEnum docsAndPositionsEnum = null;
final TermsEnum termsEnum = termsIn.iterator(null);
int termOffset = 0;
@@ -875,6 +874,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
if (terms[termOrd] instanceof LowFreqTerm) {
final int[] postings = ((LowFreqTerm) terms[termOrd]).postings;
+ final byte[] payloads = ((LowFreqTerm) terms[termOrd]).payloads;
if (hasFreq) {
if (hasPos) {
int posLen;
@@ -890,13 +890,13 @@ public final class DirectPostingsFormat extends PostingsFormat {
if (reuse instanceof LowFreqDocsEnum) {
docsEnum = (LowFreqDocsEnum) reuse;
if (!docsEnum.canReuse(liveDocs, posLen)) {
- docsEnum = new LowFreqDocsEnum(liveDocs, posLen);
+ docsEnum = new LowFreqDocsEnum(liveDocs, hasOffsets, hasPayloads);
}
} else {
- docsEnum = new LowFreqDocsEnum(liveDocs, posLen);
+ docsEnum = new LowFreqDocsEnum(liveDocs, hasOffsets, hasPayloads);
}
- return docsEnum.reset(postings);
+ return docsEnum.reset(postings, payloads);
} else {
LowFreqDocsEnumNoPos docsEnum;
if (reuse instanceof LowFreqDocsEnumNoPos) {
@@ -942,7 +942,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) {
if (!hasPos) {
return null;
}
@@ -954,7 +954,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
final int[] postings = term.postings;
final byte[] payloads = term.payloads;
- return new LowFreqDocsAndPositionsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
+ return new LowFreqDocsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
} else {
final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
return new HighFreqDocsAndPositionsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
@@ -1473,6 +1473,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
if (terms[termOrd] instanceof LowFreqTerm) {
final int[] postings = ((LowFreqTerm) terms[termOrd]).postings;
+ final byte[] payloads = ((LowFreqTerm) terms[termOrd]).payloads;
if (hasFreq) {
if (hasPos) {
int posLen;
@@ -1484,7 +1485,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
if (hasPayloads) {
posLen++;
}
- return new LowFreqDocsEnum(liveDocs, posLen).reset(postings);
+ return new LowFreqDocsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
} else {
return new LowFreqDocsEnumNoPos(liveDocs).reset(postings);
}
@@ -1499,7 +1500,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) {
if (!hasPos) {
return null;
}
@@ -1511,7 +1512,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
final int[] postings = term.postings;
final byte[] payloads = term.payloads;
- return new LowFreqDocsAndPositionsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
+ return new LowFreqDocsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
} else {
final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
return new HighFreqDocsAndPositionsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
@@ -1587,6 +1588,11 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
@Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
+ @Override
public int advance(int target) throws IOException {
// Linear scan, but this is low-freq term so it won't
// be costly:
@@ -1655,87 +1661,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
@Override
- public int advance(int target) throws IOException {
- // Linear scan, but this is low-freq term so it won't
- // be costly:
- return slowAdvance(target);
- }
-
- @Override
- public long cost() {
- return postings.length / 2;
- }
- }
-
- // Docs + freqs + positions/offets:
- private final static class LowFreqDocsEnum extends DocsEnum {
- private int[] postings;
- private final Bits liveDocs;
- private final int posMult;
- private int upto;
- private int freq;
-
- public LowFreqDocsEnum(Bits liveDocs, int posMult) {
- this.liveDocs = liveDocs;
- this.posMult = posMult;
- // if (DEBUG) {
- // System.out.println("LowFreqDE: posMult=" + posMult);
- // }
- }
-
- public boolean canReuse(Bits liveDocs, int posMult) {
- return liveDocs == this.liveDocs && posMult == this.posMult;
- }
-
- public DocsEnum reset(int[] postings) {
- this.postings = postings;
- upto = -2;
- freq = 0;
- return this;
- }
-
- // TODO: can do this w/o setting members?
- @Override
- public int nextDoc() {
- upto += 2 + freq*posMult;
- // if (DEBUG) {
- // System.out.println(" nextDoc freq=" + freq + " upto=" + upto + " vs " + postings.length);
- // }
- if (liveDocs == null) {
- if (upto < postings.length) {
- freq = postings[upto+1];
- assert freq > 0;
- return postings[upto];
- }
- } else {
- while (upto < postings.length) {
- freq = postings[upto+1];
- assert freq > 0;
- if (liveDocs.get(postings[upto])) {
- return postings[upto];
- }
- upto += 2 + freq*posMult;
- }
- }
- return NO_MORE_DOCS;
- }
-
- @Override
- public int docID() {
- // TODO: store docID member?
- if (upto < 0) {
- return -1;
- } else if (upto < postings.length) {
- return postings[upto];
- } else {
- return NO_MORE_DOCS;
- }
- }
-
- @Override
- public int freq() {
- // TODO: can I do postings[upto+1]?
- return freq;
+ public int nextPosition() throws IOException {
+ return -1;
}
@Override
@@ -1747,12 +1674,11 @@ public final class DirectPostingsFormat extends PostingsFormat {
@Override
public long cost() {
- // TODO: could do a better estimate
return postings.length / 2;
}
}
- private final static class LowFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
+ private final static class LowFreqDocsEnum extends DocsEnum {
private int[] postings;
private final Bits liveDocs;
private final int posMult;
@@ -1763,6 +1689,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
private int docID;
private int freq;
private int skipPositions;
+ private int pos;
private int startOffset;
private int endOffset;
private int lastPayloadOffset;
@@ -1770,7 +1697,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
private int payloadLength;
private byte[] payloadBytes;
- public LowFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets, boolean hasPayloads) {
+ public LowFreqDocsEnum(Bits liveDocs, boolean hasOffsets, boolean hasPayloads) {
this.liveDocs = liveDocs;
this.hasOffsets = hasOffsets;
this.hasPayloads = hasPayloads;
@@ -1787,7 +1714,11 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
}
- public DocsAndPositionsEnum reset(int[] postings, byte[] payloadBytes) {
+ public boolean canReuse(Bits liveDocs, int posMult) {
+ return liveDocs == this.liveDocs && posMult == this.posMult;
+ }
+
+ public DocsEnum reset(int[] postings, byte[] payloadBytes) {
this.postings = postings;
upto = 0;
skipPositions = 0;
@@ -1841,7 +1772,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
}
}
-
+ pos = -1;
return docID = NO_MORE_DOCS;
}
@@ -1857,9 +1788,11 @@ public final class DirectPostingsFormat extends PostingsFormat {
@Override
public int nextPosition() {
- assert skipPositions > 0;
+ //assert skipPositions > 0;
+ if (skipPositions == 0)
+ return NO_MORE_POSITIONS;
skipPositions--;
- final int pos = postings[upto++];
+ pos = postings[upto++];
if (hasOffsets) {
startOffset = postings[upto++];
endOffset = postings[upto++];
@@ -1873,6 +1806,16 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
@Override
+ public int startPosition() throws IOException {
+ return pos;
+ }
+
+ @Override
+ public int endPosition() throws IOException {
+ return pos;
+ }
+
+ @Override
public int startOffset() {
return startOffset;
}
@@ -1971,6 +1914,11 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
@Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
+ @Override
public int advance(int target) {
/*
upto++;
@@ -2085,7 +2033,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
// TODO: specialize offsets and not
- private final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
+ private final static class HighFreqDocsAndPositionsEnum extends DocsEnum {
private int[] docIDs;
private int[] freqs;
private int[][] positions;
@@ -2120,7 +2068,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
return liveDocs;
}
- public DocsAndPositionsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) {
+ public DocsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) {
this.docIDs = docIDs;
this.freqs = freqs;
this.positions = positions;
@@ -2164,6 +2112,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
@Override
public int nextPosition() {
+ if (posUpto >= curPositions.length)
+ return NO_MORE_POSITIONS;
posUpto += posJump;
return curPositions[posUpto];
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
index 1e4e9cc..c84de8c 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
@@ -17,20 +17,14 @@ package org.apache.lucene.codecs.memory;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.TreeMap;
-
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
@@ -41,26 +35,31 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.automaton.ByteRunAutomaton;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.automaton.ByteRunAutomaton;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
-import org.apache.lucene.codecs.BlockTermState;
-import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsReaderBase;
-import org.apache.lucene.codecs.CodecUtil;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TreeMap;
/**
* FST-based terms dictionary reader.
@@ -438,12 +437,8 @@ public class FSTOrdTermsReader extends FieldsProducer {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
- if (!hasPositions()) {
- return null;
- }
- decodeMetaData();
- return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+ return docs(liveDocs, reuse, flags);
}
// TODO: this can be achieved by making use of Util.getByOutput()
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
index 97b96c7..55fc374 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
@@ -17,19 +17,14 @@ package org.apache.lucene.codecs.memory;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.TreeMap;
-
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
@@ -39,25 +34,29 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.automaton.ByteRunAutomaton;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.automaton.ByteRunAutomaton;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util;
-import org.apache.lucene.codecs.BlockTermState;
-import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsReaderBase;
-import org.apache.lucene.codecs.CodecUtil;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TreeMap;
/**
* FST-based terms dictionary reader.
@@ -305,12 +304,8 @@ public class FSTTermsReader extends FieldsProducer {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
- if (!hasPositions()) {
- return null;
- }
- decodeMetaData();
- return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+ return docs(liveDocs, reuse, flags);
}
@Override
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
index f5ba225..6a78b89 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
@@ -17,20 +17,11 @@ package org.apache.lucene.codecs.memory;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicLong;
-
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -67,6 +58,14 @@ import org.apache.lucene.util.packed.BlockPackedReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
import org.apache.lucene.util.packed.PackedInts;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
/**
* Reader for {@link MemoryDocValuesFormat}
*/
@@ -840,7 +839,7 @@ class MemoryDocValuesProducer extends DocValuesProducer {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
index 8e58251..b78ca30 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
@@ -17,19 +17,11 @@ package org.apache.lucene.codecs.memory;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
@@ -53,7 +45,6 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.Builder;
@@ -63,6 +54,13 @@ import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
// TODO: would be nice to somehow allow this to act like
// InstantiatedIndex, by never writing to disk; ie you write
// to this Codec in RAM only and then when you open a reader
@@ -317,7 +315,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
DocsEnum docsEnum = null;
- DocsAndPositionsEnum posEnum = null;
+ DocsEnum posEnum = null;
int enumFlags;
IndexOptions indexOptions = fieldInfo.getIndexOptions();
@@ -332,15 +330,15 @@ public final class MemoryPostingsFormat extends PostingsFormat {
enumFlags = DocsEnum.FLAG_FREQS;
} else if (writeOffsets == false) {
if (writePayloads) {
- enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS;
+ enumFlags = DocsEnum.FLAG_PAYLOADS;
} else {
enumFlags = 0;
}
} else {
if (writePayloads) {
- enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS | DocsAndPositionsEnum.FLAG_OFFSETS;
+ enumFlags = DocsEnum.FLAG_PAYLOADS | DocsEnum.FLAG_OFFSETS;
} else {
- enumFlags = DocsAndPositionsEnum.FLAG_OFFSETS;
+ enumFlags = DocsEnum.FLAG_OFFSETS;
}
}
@@ -539,14 +537,19 @@ public final class MemoryPostingsFormat extends PostingsFormat {
public int freq() {
return freq;
}
-
+
+ @Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
@Override
public long cost() {
return numDocs;
}
}
- private final static class FSTDocsAndPositionsEnum extends DocsAndPositionsEnum {
+ private final static class FSTDocsAndPositionsEnum extends DocsEnum {
private final boolean storePayloads;
private byte[] buffer = new byte[16];
private final ByteArrayDataInput in = new ByteArrayDataInput(buffer);
@@ -817,7 +820,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) {
boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
index 780c821..6060846 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
@@ -17,26 +17,7 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License.
*/
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
-import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.TreeMap;
-
import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
@@ -57,11 +38,9 @@ import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
@@ -69,6 +48,24 @@ import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
+
class SimpleTextFieldsReader extends FieldsProducer {
private static final long BASE_RAM_BYTES_USED =
@@ -216,22 +213,22 @@ class SimpleTextFieldsReader extends FieldsProducer {
} else {
docsEnum = new SimpleTextDocsEnum();
}
- return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY, docFreq);
+ return docsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed
return null;
}
- SimpleTextDocsAndPositionsEnum docsAndPositionsEnum;
- if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
- docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse;
+ SimpleTextDocsEnum docsAndPositionsEnum;
+ if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
+ docsAndPositionsEnum = (SimpleTextDocsEnum) reuse;
} else {
- docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
+ docsAndPositionsEnum = new SimpleTextDocsEnum();
}
return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
}
@@ -240,105 +237,6 @@ class SimpleTextFieldsReader extends FieldsProducer {
private class SimpleTextDocsEnum extends DocsEnum {
private final IndexInput inStart;
private final IndexInput in;
- private boolean omitTF;
- private int docID = -1;
- private int tf;
- private Bits liveDocs;
- private final BytesRefBuilder scratch = new BytesRefBuilder();
- private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
- private int cost;
-
- public SimpleTextDocsEnum() {
- this.inStart = SimpleTextFieldsReader.this.in;
- this.in = this.inStart.clone();
- }
-
- public boolean canReuse(IndexInput in) {
- return in == inStart;
- }
-
- public SimpleTextDocsEnum reset(long fp, Bits liveDocs, boolean omitTF, int docFreq) throws IOException {
- this.liveDocs = liveDocs;
- in.seek(fp);
- this.omitTF = omitTF;
- docID = -1;
- tf = 1;
- cost = docFreq;
- return this;
- }
-
- @Override
- public int docID() {
- return docID;
- }
-
- @Override
- public int freq() throws IOException {
- return tf;
- }
-
- @Override
- public int nextDoc() throws IOException {
- if (docID == NO_MORE_DOCS) {
- return docID;
- }
- boolean first = true;
- int termFreq = 0;
- while(true) {
- final long lineStart = in.getFilePointer();
- SimpleTextUtil.readLine(in, scratch);
- if (StringHelper.startsWith(scratch.get(), DOC)) {
- if (!first && (liveDocs == null || liveDocs.get(docID))) {
- in.seek(lineStart);
- if (!omitTF) {
- tf = termFreq;
- }
- return docID;
- }
- scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
- docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
- termFreq = 0;
- first = false;
- } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
- scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
- termFreq = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
- } else if (StringHelper.startsWith(scratch.get(), POS)) {
- // skip termFreq++;
- } else if (StringHelper.startsWith(scratch.get(), START_OFFSET)) {
- // skip
- } else if (StringHelper.startsWith(scratch.get(), END_OFFSET)) {
- // skip
- } else if (StringHelper.startsWith(scratch.get(), PAYLOAD)) {
- // skip
- } else {
- assert StringHelper.startsWith(scratch.get(), TERM) || StringHelper.startsWith(scratch.get(), FIELD) || StringHelper.startsWith(scratch.get(), END): "scratch=" + scratch.get().utf8ToString();
- if (!first && (liveDocs == null || liveDocs.get(docID))) {
- in.seek(lineStart);
- if (!omitTF) {
- tf = termFreq;
- }
- return docID;
- }
- return docID = NO_MORE_DOCS;
- }
- }
- }
-
- @Override
- public int advance(int target) throws IOException {
- // Naive -- better to index skip data
- return slowAdvance(target);
- }
-
- @Override
- public long cost() {
- return cost;
- }
- }
-
- private class SimpleTextDocsAndPositionsEnum extends DocsAndPositionsEnum {
- private final IndexInput inStart;
- private final IndexInput in;
private int docID = -1;
private int tf;
private Bits liveDocs;
@@ -352,9 +250,10 @@ class SimpleTextFieldsReader extends FieldsProducer {
private boolean readPositions;
private int startOffset;
private int endOffset;
+ private int posPending;
private int cost;
- public SimpleTextDocsAndPositionsEnum() {
+ public SimpleTextDocsEnum() {
this.inStart = SimpleTextFieldsReader.this.in;
this.in = inStart.clone();
}
@@ -363,7 +262,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
return in == inStart;
}
- public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions, int docFreq) {
+ public SimpleTextDocsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions, int docFreq) {
this.liveDocs = liveDocs;
nextDocStart = fp;
docID = -1;
@@ -405,6 +304,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
tf = 0;
+ posPending = 0;
first = false;
} else if (StringHelper.startsWith(scratch.get(), FREQ)) {
scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
@@ -439,6 +339,9 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override
public int nextPosition() throws IOException {
final int pos;
+ if (posPending == 0)
+ return NO_MORE_POSITIONS;
+
if (readPositions) {
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch.get(), POS): "got line=" + scratch.get().utf8ToString();
@@ -472,6 +375,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
payload = null;
in.seek(fp);
}
+ posPending--;
return pos;
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
index 656713d..26a012c 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
@@ -17,10 +17,7 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -32,8 +29,10 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
+import java.io.IOException;
+
class SimpleTextFieldsWriter extends FieldsConsumer {
-
+
private IndexOutput out;
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final SegmentWriteState writeState;
@@ -81,10 +80,10 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
if (hasPositions) {
if (hasPayloads) {
- flags = flags | DocsAndPositionsEnum.FLAG_PAYLOADS;
+ flags = flags | DocsEnum.FLAG_PAYLOADS;
}
if (hasOffsets) {
- flags = flags | DocsAndPositionsEnum.FLAG_OFFSETS;
+ flags = flags | DocsEnum.FLAG_OFFSETS;
}
} else {
if (hasFreqs) {
@@ -93,7 +92,6 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
}
TermsEnum termsEnum = terms.iterator(null);
- DocsAndPositionsEnum posEnum = null;
DocsEnum docsEnum = null;
// for each term in field
@@ -104,8 +102,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
}
if (hasPositions) {
- posEnum = termsEnum.docsAndPositions(null, posEnum, flags);
- docsEnum = posEnum;
+ docsEnum = termsEnum.docsAndPositions(null, docsEnum, flags);
} else {
docsEnum = termsEnum.docs(null, docsEnum, flags);
}
@@ -154,15 +151,15 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
// for each pos in field+term+doc
for(int i=0;i= startOffset;
assert startOffset >= lastStartOffset: "startOffset=" + startOffset + " lastStartOffset=" + lastStartOffset;
lastStartOffset = startOffset;
@@ -174,7 +171,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
newline();
}
- BytesRef payload = posEnum.getPayload();
+ BytesRef payload = docsEnum.getPayload();
if (payload != null && payload.length > 0) {
assert payload.length != 0;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
index d085bba..49a87db 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
@@ -17,15 +17,7 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
import org.apache.lucene.codecs.TermVectorsReader;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexFileNames;
@@ -49,7 +41,29 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
-import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.DOC;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.END;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.ENDOFFSET;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELDNAME;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELDOFFSETS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELDPAYLOADS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELDPOSITIONS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.FIELDTERMCOUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.NUMFIELDS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.PAYLOAD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.POSITION;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.STARTOFFSET;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.TERMFREQ;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.TERMTEXT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.VECTORS_EXTENSION;
/**
* Reads plain-text term vectors.
@@ -391,73 +405,28 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
// TODO: reuse
+ SimpleTVPostings postings = current.getValue();
SimpleTVDocsEnum e = new SimpleTVDocsEnum();
- e.reset(liveDocs, (flags & DocsEnum.FLAG_FREQS) == 0 ? 1 : current.getValue().freq);
+ e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
return e;
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
SimpleTVPostings postings = current.getValue();
if (postings.positions == null && postings.startOffsets == null) {
return null;
}
// TODO: reuse
- SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
+ SimpleTVDocsEnum e = new SimpleTVDocsEnum();
e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
return e;
}
}
- // note: these two enum classes are exactly like the Default impl...
private static class SimpleTVDocsEnum extends DocsEnum {
private boolean didNext;
private int doc = -1;
- private int freq;
- private Bits liveDocs;
-
- @Override
- public int freq() throws IOException {
- assert freq != -1;
- return freq;
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() {
- if (!didNext && (liveDocs == null || liveDocs.get(0))) {
- didNext = true;
- return (doc = 0);
- } else {
- return (doc = NO_MORE_DOCS);
- }
- }
-
- @Override
- public int advance(int target) throws IOException {
- return slowAdvance(target);
- }
-
- public void reset(Bits liveDocs, int freq) {
- this.liveDocs = liveDocs;
- this.freq = freq;
- this.doc = -1;
- didNext = false;
- }
-
- @Override
- public long cost() {
- return 1;
- }
- }
-
- private static class SimpleTVDocsAndPositionsEnum extends DocsAndPositionsEnum {
- private boolean didNext;
- private int doc = -1;
private int nextPos;
private Bits liveDocs;
private int[] positions;
@@ -513,9 +482,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
@Override
public int nextPosition() {
- assert (positions != null && nextPos < positions.length) ||
- startOffsets != null && nextPos < startOffsets.length;
+ //assert (positions != null && nextPos < positions.length) ||
+ // startOffsets != null && nextPos < startOffsets.length;
if (positions != null) {
+ if (nextPos >= positions.length)
+ return NO_MORE_POSITIONS;
return positions[nextPos++];
} else {
nextPos++;
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/Token.java b/lucene/core/src/java/org/apache/lucene/analysis/Token.java
index c3bfecb..230a2cd 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/Token.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/Token.java
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeImpl;
@@ -43,7 +43,7 @@ import org.apache.lucene.util.BytesRef;
with type "eos". The default token type is "word".
A Token can optionally have metadata (a.k.a. payload) in the form of a variable
- length byte array. Use {@link DocsAndPositionsEnum#getPayload()} to retrieve the
+ length byte array. Use {@link DocsEnum#getPayload()} to retrieve the
payloads from the index.
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
index 8793c94..0029ccf 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
@@ -17,7 +17,7 @@ package org.apache.lucene.analysis.tokenattributes;
* limitations under the License.
*/
-import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
+import org.apache.lucene.index.DocsEnum; // javadocs
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.BytesRef;
@@ -33,7 +33,7 @@ import org.apache.lucene.util.BytesRef;
* best to use the minimum number of bytes necessary. Some codec implementations
* may optimize payload storage when all payloads have the same length.
*
- * @see DocsAndPositionsEnum
+ * @see DocsEnum
*/
public interface PayloadAttribute extends Attribute {
/**
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java
index 9afd2f9..a7a7cd9 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java
@@ -43,7 +43,7 @@ import org.apache.lucene.util.Attribute;
*
*
*
- * @see org.apache.lucene.index.DocsAndPositionsEnum
+ * @see org.apache.lucene.index.DocsEnum
*/
public interface PositionIncrementAttribute extends Attribute {
/** Set the position increment. The default value is one.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
index 7145fe9..2aa6add 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
@@ -17,10 +17,6 @@ package org.apache.lucene.codecs;
* limitations under the License.
*/
-import java.io.Closeable;
-import java.io.IOException;
-
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.store.DataInput;
@@ -28,10 +24,13 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
+import java.io.Closeable;
+import java.io.IOException;
+
/** The core terms dictionaries (BlockTermsReader,
* BlockTreeTermsReader) interact with a single instance
* of this class to manage creation of {@link DocsEnum} and
- * {@link DocsAndPositionsEnum} instances. It provides an
+ * {@link DocsEnum} instances. It provides an
* IndexInput (termsIn) where this class may read any
* previously stored data that it had written in its
* corresponding {@link PostingsWriterBase} at indexing
@@ -69,9 +68,10 @@ public abstract class PostingsReaderBase implements Closeable, Accountable {
/** Must fully consume state, since after this call that
* TermState may be reused. */
- public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse,
+ public abstract DocsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse,
int flags) throws IOException;
-
+ // nocommit this still has the distinction - no need to remove this as long as we get the interface straight?
+
/**
* Checks consistency of this reader.
*
@@ -80,7 +80,7 @@ public abstract class PostingsReaderBase implements Closeable, Accountable {
* @lucene.internal
*/
public abstract void checkIntegrity() throws IOException;
-
+
@Override
public abstract void close() throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
index 6e08316..fe7cd0a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
@@ -17,12 +17,8 @@ package org.apache.lucene.codecs;
* limitations under the License.
*/
-import java.io.Closeable;
-import java.io.IOException;
-
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
-import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
-import org.apache.lucene.index.DocsEnum; // javadocs
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.DataOutput;
@@ -30,6 +26,9 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
+import java.io.Closeable;
+import java.io.IOException;
+
/**
* Class that plugs into term dictionaries, such as {@link
* BlockTreeTermsWriter}, and handles writing postings.
@@ -53,8 +52,8 @@ public abstract class PostingsWriterBase implements Closeable {
public abstract void init(IndexOutput termsOut) throws IOException;
/** Write all postings for one term; use the provided
- * {@link TermsEnum} to pull a {@link DocsEnum} or {@link
- * DocsAndPositionsEnum}. This method should not
+ * {@link TermsEnum} to pull a {@link DocsEnum}.
+ * This method should not
* re-position the {@code TermsEnum}! It is already
* positioned on the term that should be written. This
* method must set the bit in the provided {@link
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
index a310cf6..bd25e0c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
@@ -17,18 +17,17 @@ package org.apache.lucene.codecs;
* limitations under the License.
*/
-import java.io.IOException;
-
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
+import java.io.IOException;
+
/**
* Extension of {@link PostingsWriterBase}, adding a push
* API for writing each element of the postings. This API
@@ -45,7 +44,6 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
// Reused in writeTerm
private DocsEnum docsEnum;
- private DocsAndPositionsEnum posEnum;
private int enumFlags;
/** {@link FieldInfo} of current field being written. */
@@ -125,15 +123,15 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
enumFlags = DocsEnum.FLAG_FREQS;
} else if (writeOffsets == false) {
if (writePayloads) {
- enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS;
+ enumFlags = DocsEnum.FLAG_PAYLOADS;
} else {
enumFlags = 0;
}
} else {
if (writePayloads) {
- enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS | DocsAndPositionsEnum.FLAG_OFFSETS;
+ enumFlags = DocsEnum.FLAG_PAYLOADS | DocsEnum.FLAG_OFFSETS;
} else {
- enumFlags = DocsAndPositionsEnum.FLAG_OFFSETS;
+ enumFlags = DocsEnum.FLAG_OFFSETS;
}
}
@@ -146,8 +144,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
if (writePositions == false) {
docsEnum = termsEnum.docs(null, docsEnum, enumFlags);
} else {
- posEnum = termsEnum.docsAndPositions(null, posEnum, enumFlags);
- docsEnum = posEnum;
+ docsEnum = termsEnum.docsAndPositions(null, docsEnum, enumFlags);
}
assert docsEnum != null;
@@ -171,13 +168,13 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
if (writePositions) {
for(int i=0;i= 0;
+ assert !hasPositions || pos >= 0 ;
addPosition(pos, startOffset, endOffset, payload);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
index a7a569b..7278a7b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
@@ -17,9 +17,6 @@ package org.apache.lucene.codecs.blocktree;
* limitations under the License.
*/
-import java.io.IOException;
-
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.TermState;
@@ -36,6 +33,8 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
+import java.io.IOException;
+
// NOTE: cannot seek!
final class IntersectTermsEnum extends TermsEnum {
final IndexInput in;
@@ -209,7 +208,7 @@ final class IntersectTermsEnum extends TermsEnum {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed:
return null;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
index 32566de..fa8cd2d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
@@ -17,11 +17,7 @@ package org.apache.lucene.codecs.blocktree;
* limitations under the License.
*/
-import java.io.IOException;
-import java.io.PrintStream;
-
import org.apache.lucene.codecs.BlockTermState;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.TermState;
@@ -36,6 +32,9 @@ import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
+import java.io.IOException;
+import java.io.PrintStream;
+
/** Iterates through terms in this field */
final class SegmentTermsEnum extends TermsEnum {
@@ -994,7 +993,7 @@ final class SegmentTermsEnum extends TermsEnum {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed:
return null;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
index 9cac92b..059ca65 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
@@ -39,7 +39,6 @@ import java.util.NoSuchElementException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -64,6 +63,24 @@ import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.BlockPackedReaderIterator;
import org.apache.lucene.util.packed.PackedInts;
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.BLOCK_SIZE;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_DAT;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_IDX;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.FLAGS_BITS;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.OFFSETS;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.PAYLOADS;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.POSITIONS;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_EXTENSION;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CHECKSUM;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START;
+
/**
* {@link TermVectorsReader} for {@link CompressingTermVectorsFormat}.
@@ -913,17 +930,17 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
if (positions == null && startOffsets == null) {
return null;
}
// TODO: slightly sheisty
- return (DocsAndPositionsEnum) docs(liveDocs, reuse, flags);
+ return docs(liveDocs, reuse, flags);
}
}
- private static class TVDocsEnum extends DocsAndPositionsEnum {
+ private static class TVDocsEnum extends DocsEnum {
private Bits liveDocs;
private int doc = -1;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
index 26cb34b..9eefdfe 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
@@ -17,19 +17,9 @@ package org.apache.lucene.codecs.lucene41;
* limitations under the License.
*/
-import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE;
-import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE;
-import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE;
-import static org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter.IntBlockTermState;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collections;
-
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
@@ -47,6 +37,15 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+
+import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE;
+import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE;
+import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE;
+import static org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter.IntBlockTermState;
+
/**
* Concrete class that reads docId(maybe frq,pos,offset,payloads) list
* with postings format.
@@ -246,30 +245,38 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
- BlockDocsEnum docsEnum;
- if (reuse instanceof BlockDocsEnum) {
- docsEnum = (BlockDocsEnum) reuse;
- if (!docsEnum.canReuse(docIn, fieldInfo)) {
+ if ((flags & DocsEnum.FLAG_POSITIONS) != DocsEnum.FLAG_POSITIONS) {
+ BlockDocsEnum docsEnum;
+ if (reuse instanceof BlockDocsEnum) {
+ docsEnum = (BlockDocsEnum) reuse;
+ if (!docsEnum.canReuse(docIn, fieldInfo)) {
+ docsEnum = new BlockDocsEnum(fieldInfo);
+ }
+ } else {
docsEnum = new BlockDocsEnum(fieldInfo);
}
- } else {
- docsEnum = new BlockDocsEnum(fieldInfo);
+ return docsEnum.reset(liveDocs, (IntBlockTermState) termState, flags);
}
- return docsEnum.reset(liveDocs, (IntBlockTermState) termState, flags);
+
+ return docsAndPositions(fieldInfo, termState, liveDocs, reuse, flags);
}
// TODO: specialize to liveDocs vs not
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
- DocsAndPositionsEnum reuse, int flags)
+ public DocsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
+ DocsEnum reuse, int flags)
throws IOException {
+ boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
boolean indexHasPayloads = fieldInfo.hasPayloads();
- if ((!indexHasOffsets || (flags & DocsAndPositionsEnum.FLAG_OFFSETS) == 0) &&
- (!indexHasPayloads || (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) == 0)) {
+ if (!indexHasPositions)
+ return null;
+
+ if ((!indexHasOffsets || (flags & DocsEnum.FLAG_OFFSETS) == 0) &&
+ (!indexHasPayloads || (flags & DocsEnum.FLAG_PAYLOADS) == 0)) {
BlockDocsAndPositionsEnum docsAndPositionsEnum;
if (reuse instanceof BlockDocsAndPositionsEnum) {
docsAndPositionsEnum = (BlockDocsAndPositionsEnum) reuse;
@@ -391,6 +398,11 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
}
@Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
+ @Override
public int docID() {
return doc;
}
@@ -572,7 +584,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
}
- final class BlockDocsAndPositionsEnum extends DocsAndPositionsEnum {
+ final class BlockDocsAndPositionsEnum extends DocsEnum {
private final byte[] encoded;
@@ -650,7 +662,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
indexHasPayloads == fieldInfo.hasPayloads();
}
- public DocsAndPositionsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException {
+ public DocsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException {
this.liveDocs = liveDocs;
// if (DEBUG) {
// System.out.println(" FPR.reset: termState=" + termState);
@@ -942,6 +954,9 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
// if (DEBUG) {
// System.out.println(" FPR.nextPosition posPendingCount=" + posPendingCount + " posBufferUpto=" + posBufferUpto);
// }
+ if (posPendingCount == 0)
+ return NO_MORE_POSITIONS;
+
if (posPendingFP != -1) {
// if (DEBUG) {
// System.out.println(" seek to pendingFP=" + posPendingFP);
@@ -971,6 +986,16 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
}
@Override
+ public int startPosition() {
+ return position;
+ }
+
+ @Override
+ public int endPosition() {
+ return position;
+ }
+
+ @Override
public int startOffset() {
return -1;
}
@@ -992,7 +1017,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
}
// Also handles payloads + offsets
- final class EverythingEnum extends DocsAndPositionsEnum {
+ final class EverythingEnum extends DocsEnum {
private final byte[] encoded;
@@ -1141,8 +1166,8 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
}
- this.needsOffsets = (flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0;
- this.needsPayloads = (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) != 0;
+ this.needsOffsets = (flags & DocsEnum.FLAG_OFFSETS) != 0;
+ this.needsPayloads = (flags & DocsEnum.FLAG_PAYLOADS) != 0;
doc = -1;
accum = 0;
@@ -1498,6 +1523,9 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
// if (DEBUG) {
// System.out.println(" FPR.nextPosition posPendingCount=" + posPendingCount + " posBufferUpto=" + posBufferUpto + " payloadByteUpto=" + payloadByteUpto)// ;
// }
+ if (posPendingCount == 0)
+ return NO_MORE_POSITIONS;
+
if (posPendingFP != -1) {
// if (DEBUG) {
// System.out.println(" seek pos to pendingFP=" + posPendingFP);
@@ -1551,6 +1579,16 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
}
@Override
+ public int startPosition() {
+ return position;
+ }
+
+ @Override
+ public int endPosition() {
+ return position;
+ }
+
+ @Override
public int startOffset() {
return startOffset;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene410/Lucene410DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene410/Lucene410DocValuesProducer.java
index 912e087..cb14e54 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene410/Lucene410DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene410/Lucene410DocValuesProducer.java
@@ -17,38 +17,11 @@ package org.apache.lucene.codecs.lucene410;
* limitations under the License.
*/
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_PREFIX_COMPRESSED;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.DELTA_COMPRESSED;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.GCD_COMPRESSED;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.MONOTONIC_COMPRESSED;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.SORTED_SINGLE_VALUED;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.SORTED_WITH_ADDRESSES;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.TABLE_COMPRESSED;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_SHIFT;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_COUNT;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_MASK;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.REVERSE_INTERVAL_SHIFT;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.REVERSE_INTERVAL_MASK;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BLOCK_INTERVAL_SHIFT;
-import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BLOCK_INTERVAL_MASK;
-
-import java.io.Closeable; // javadocs
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicLong;
-
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -74,6 +47,32 @@ import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.DirectReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_PREFIX_COMPRESSED;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BLOCK_INTERVAL_MASK;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.BLOCK_INTERVAL_SHIFT;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.DELTA_COMPRESSED;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.GCD_COMPRESSED;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_COUNT;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_MASK;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.INTERVAL_SHIFT;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.MONOTONIC_COMPRESSED;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.REVERSE_INTERVAL_MASK;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.REVERSE_INTERVAL_SHIFT;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.SORTED_SINGLE_VALUED;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.SORTED_WITH_ADDRESSES;
+import static org.apache.lucene.codecs.lucene410.Lucene410DocValuesConsumer.TABLE_COMPRESSED;
+
/** reader for {@link Lucene410DocValuesFormat} */
class Lucene410DocValuesProducer extends DocValuesProducer implements Closeable {
private final Map numerics;
@@ -1084,7 +1083,7 @@ class Lucene410DocValuesProducer extends DocValuesProducer implements Closeable
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
index 3f62251..a421da1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
@@ -17,11 +17,10 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-
-import org.apache.lucene.index.IndexReader.ReaderClosedListener;
import org.apache.lucene.util.Bits;
+import java.io.IOException;
+
/** {@code AtomicReader} is an abstract class, providing an interface for accessing an
index. Search of an index is done entirely through this abstract interface,
so that any subclass which implements it is searchable. IndexReaders implemented
@@ -238,11 +237,11 @@ public abstract class AtomicReader extends IndexReader {
return null;
}
- /** Returns {@link DocsAndPositionsEnum} for the specified
+ /** Returns {@link DocsEnum} for the specified
* term. This will return null if the
* field or term does not exist or positions weren't indexed.
- * @see TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum) */
- public final DocsAndPositionsEnum termPositionsEnum(Term term) throws IOException {
+ * @see TermsEnum#docsAndPositions(Bits, DocsEnum) */
+ public final DocsEnum termPositionsEnum(Term term) throws IOException {
assert term.field() != null;
assert term.bytes() != null;
final Fields fields = fields();
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 52178f7..37603be 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -763,7 +763,7 @@ public class CheckIndex {
DocsEnum docs = null;
DocsEnum docsAndFreqs = null;
- DocsAndPositionsEnum postings = null;
+ DocsEnum postings = null;
String lastField = null;
for (String field : fields) {
@@ -1692,11 +1692,11 @@ public class CheckIndex {
}
DocsEnum docs = null;
- DocsAndPositionsEnum postings = null;
+ DocsEnum postings = null;
// Only used if crossCheckTermVectors is true:
DocsEnum postingsDocs = null;
- DocsAndPositionsEnum postingsPostings = null;
+ DocsEnum postingsPostings = null;
final Bits liveDocs = reader.getLiveDocs();
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
deleted file mode 100644
index 60ac2bb..0000000
--- a/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
+++ /dev/null
@@ -1,62 +0,0 @@
-package org.apache.lucene.index;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.util.Bits; // javadocs
-import org.apache.lucene.util.BytesRef;
-
-/** Also iterates through positions. */
-public abstract class DocsAndPositionsEnum extends DocsEnum {
-
- /** Flag to pass to {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}
- * if you require offsets in the returned enum. */
- public static final int FLAG_OFFSETS = 0x1;
-
- /** Flag to pass to {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}
- * if you require payloads in the returned enum. */
- public static final int FLAG_PAYLOADS = 0x2;
-
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected DocsAndPositionsEnum() {
- }
-
- /** Returns the next position. You should only call this
- * up to {@link DocsEnum#freq()} times else
- * the behavior is not defined. If positions were not
- * indexed this will return -1; this only happens if
- * offsets were indexed and you passed needsOffset=true
- * when pulling the enum. */
- public abstract int nextPosition() throws IOException;
-
- /** Returns start offset for the current position, or -1
- * if offsets were not indexed. */
- public abstract int startOffset() throws IOException;
-
- /** Returns end offset for the current position, or -1 if
- * offsets were not indexed. */
- public abstract int endOffset() throws IOException;
-
- /** Returns the payload at this position, or null if no
- * payload was indexed. You should not modify anything
- * (neither members of the returned BytesRef nor bytes
- * in the byte[]). */
- public abstract BytesRef getPayload() throws IOException;
-}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java b/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java
index fa4cf54..2bdecae 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java
@@ -17,12 +17,13 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Bits; // javadocs
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
/** Iterates through the documents and term freqs.
* NOTE: you must first call {@link #nextDoc} before using
@@ -31,9 +32,7 @@ public abstract class DocsEnum extends DocIdSetIterator {
/**
* Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)} if you don't
- * require term frequencies in the returned enum. When passed to
- * {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)} means
- * that no offsets and payloads will be returned.
+ * require term frequencies in the returned enum.
*/
public static final int FLAG_NONE = 0x0;
@@ -41,6 +40,20 @@ public abstract class DocsEnum extends DocIdSetIterator {
* if you require term frequencies in the returned enum. */
public static final int FLAG_FREQS = 0x1;
+ /** Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)}
+ * if you require term positions in the returned enum. */
+ public static final int FLAG_POSITIONS = 0x3;
+
+ /** Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)}
+ * if you require offsets in the returned enum. */
+ public static final int FLAG_OFFSETS = 0x7;
+
+ /** Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)}
+ * if you require payloads in the returned enum. */
+ public static final int FLAG_PAYLOADS = 0x11;
+
+ public static final int NO_MORE_POSITIONS = Integer.MAX_VALUE;
+
private AttributeSource atts = null;
/** Sole constructor. (For invocation by subclass
@@ -65,4 +78,40 @@ public abstract class DocsEnum extends DocIdSetIterator {
if (atts == null) atts = new AttributeSource();
return atts;
}
+
+ /** Returns the next position. You should only call this
+ * up to {@link DocsEnum#freq()} times else
+ * the behavior is not defined. If positions were not
+ * indexed this will return -1; this only happens if
+ * offsets were indexed and you passed needsOffset=true
+ * when pulling the enum. */
+ public abstract int nextPosition() throws IOException;
+
+ public int startPosition() throws IOException {
+ throw new UnsupportedOperationException("startPosition() is not implemented on " + this.getClass().getSimpleName());
+ }
+
+ public int endPosition() throws IOException {
+ throw new UnsupportedOperationException("endPosition() is not implemented on " + this.getClass().getSimpleName());
+ }
+
+ /** Returns start offset for the current position, or -1
+ * if offsets were not indexed. */
+ public int startOffset() throws IOException {
+ throw new UnsupportedOperationException("startOffset() is not implemented on " + this.getClass().getSimpleName());
+ }
+
+ /** Returns end offset for the current position, or -1 if
+ * offsets were not indexed. */
+ public int endOffset() throws IOException {
+ throw new UnsupportedOperationException("endOffset() is not implemented on " + this.getClass().getSimpleName());
+ }
+
+ /** Returns the payload at this position, or null if no
+ * payload was indexed. You should not modify anything
+ * (neither members of the returned BytesRef nor bytes
+ * in the byte[]). */
+ public BytesRef getPayload() throws IOException {
+ return null;
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
index 52f542d..e7847c4 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
@@ -204,7 +204,7 @@ public class FilterAtomicReader extends AtomicReader {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
return in.docsAndPositions(liveDocs, reuse, flags);
}
}
@@ -248,52 +248,18 @@ public class FilterAtomicReader extends AtomicReader {
}
@Override
- public long cost() {
- return in.cost();
- }
- }
-
- /** Base class for filtering {@link DocsAndPositionsEnum} implementations. */
- public static class FilterDocsAndPositionsEnum extends DocsAndPositionsEnum {
- /** The underlying DocsAndPositionsEnum instance. */
- protected final DocsAndPositionsEnum in;
-
- /**
- * Create a new FilterDocsAndPositionsEnum
- * @param in the underlying DocsAndPositionsEnum instance.
- */
- public FilterDocsAndPositionsEnum(DocsAndPositionsEnum in) {
- this.in = in;
- }
-
- @Override
- public AttributeSource attributes() {
- return in.attributes();
- }
-
- @Override
- public int docID() {
- return in.docID();
- }
-
- @Override
- public int freq() throws IOException {
- return in.freq();
- }
-
- @Override
- public int nextDoc() throws IOException {
- return in.nextDoc();
+ public int nextPosition() throws IOException {
+ return in.nextPosition();
}
@Override
- public int advance(int target) throws IOException {
- return in.advance(target);
+ public int startPosition() throws IOException {
+ return in.startPosition();
}
@Override
- public int nextPosition() throws IOException {
- return in.nextPosition();
+ public int endPosition() throws IOException {
+ return in.endPosition();
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
index b6bfcc4..df4e0d8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
@@ -184,7 +184,7 @@ public abstract class FilteredTermsEnum extends TermsEnum {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits bits, DocsEnum reuse, int flags) throws IOException {
return tenum.docsAndPositions(bits, reuse, flags);
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
index 5d01754..e67a08c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
@@ -17,19 +17,19 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
-import org.apache.lucene.util.AttributeSource; // javadocs
+import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
/** Implements limited (iterators only, no stats) {@link
* Fields} interface over the in-RAM buffered
* fields/terms/postings, to flush postings through the
@@ -257,7 +257,7 @@ class FreqProxFields extends Fields {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) {
if (liveDocs != null) {
throw new IllegalArgumentException("liveDocs must be null");
}
@@ -269,7 +269,7 @@ class FreqProxFields extends Fields {
throw new IllegalArgumentException("did not index positions");
}
- if (!terms.hasOffsets && (flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0) {
+ if (!terms.hasOffsets && (flags & DocsEnum.FLAG_OFFSETS) == DocsEnum.FLAG_OFFSETS) {
// Caller wants offsets but we didn't index them;
// don't lie:
throw new IllegalArgumentException("did not index offsets");
@@ -349,6 +349,11 @@ class FreqProxFields extends Fields {
}
@Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
+ @Override
public int nextDoc() throws IOException {
if (reader.eof()) {
if (ended) {
@@ -390,7 +395,7 @@ class FreqProxFields extends Fields {
}
}
- private static class FreqProxDocsAndPositionsEnum extends DocsAndPositionsEnum {
+ private static class FreqProxDocsAndPositionsEnum extends DocsEnum {
final FreqProxTermsWriterPerField terms;
final FreqProxPostingsArray postingsArray;
diff --git a/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java b/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java
index 597164b..567712c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java
@@ -17,10 +17,10 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.util.Bits;
+import java.io.IOException;
+
import static org.apache.lucene.index.FilterAtomicReader.FilterFields;
import static org.apache.lucene.index.FilterAtomicReader.FilterTerms;
import static org.apache.lucene.index.FilterAtomicReader.FilterTermsEnum;
@@ -120,7 +120,7 @@ public class MappedMultiFields extends FilterFields {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
if (liveDocs != null) {
throw new IllegalArgumentException("liveDocs must be null");
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java
index bcc3735..cc16960 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java
@@ -29,12 +29,12 @@ import java.io.IOException;
* @lucene.experimental
*/
-final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
+final class MappingMultiDocsAndPositionsEnum extends DocsEnum {
private MultiDocsAndPositionsEnum.EnumWithSlice[] subs;
int numSubs;
int upto;
MergeState.DocMap currentMap;
- DocsAndPositionsEnum current;
+ DocsEnum current;
int currentBase;
int doc = -1;
private MergeState mergeState;
diff --git a/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java
index 148ea5c..1acdc9c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java
@@ -70,6 +70,11 @@ final class MappingMultiDocsEnum extends DocsEnum {
}
@Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
+ @Override
public int docID() {
return doc;
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
index 33e2127..434c6ae 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
@@ -28,20 +28,20 @@ import java.util.Arrays;
* @lucene.experimental
*/
-public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
+public final class MultiDocsAndPositionsEnum extends DocsEnum {
private final MultiTermsEnum parent;
- final DocsAndPositionsEnum[] subDocsAndPositionsEnum;
+ final DocsEnum[] subDocsAndPositionsEnum;
private final EnumWithSlice[] subs;
int numSubs;
int upto;
- DocsAndPositionsEnum current;
+ DocsEnum current;
int currentBase;
int doc = -1;
/** Sole constructor. */
public MultiDocsAndPositionsEnum(MultiTermsEnum parent, int subReaderCount) {
this.parent = parent;
- subDocsAndPositionsEnum = new DocsAndPositionsEnum[subReaderCount];
+ subDocsAndPositionsEnum = new DocsEnum[subReaderCount];
this.subs = new EnumWithSlice[subReaderCount];
for (int i = 0; i < subs.length; i++) {
subs[i] = new EnumWithSlice();
@@ -159,14 +159,14 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
// TODO: implement bulk read more efficiently than super
- /** Holds a {@link DocsAndPositionsEnum} along with the
+ /** Holds a {@link DocsEnum} along with the
* corresponding {@link ReaderSlice}. */
public final static class EnumWithSlice {
EnumWithSlice() {
}
- /** {@link DocsAndPositionsEnum} for this sub-reader. */
- public DocsAndPositionsEnum docsAndPositionsEnum;
+ /** {@link DocsEnum} for this sub-reader. */
+ public DocsEnum docsAndPositionsEnum;
/** {@link ReaderSlice} describing how this sub-reader
* fits into the composite reader. */
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java
index 082d266..d1963db 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java
@@ -18,6 +18,8 @@ package org.apache.lucene.index;
*/
+import org.apache.lucene.util.BytesRef;
+
import java.io.IOException;
import java.util.Arrays;
@@ -89,6 +91,26 @@ public final class MultiDocsEnum extends DocsEnum {
public int docID() {
return doc;
}
+
+ @Override
+ public int nextPosition() throws IOException {
+ return current.nextPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return current.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return current.endOffset();
+ }
+
+ @Override
+ public BytesRef getPayload() throws IOException {
+ return current.getPayload();
+ }
@Override
public int advance(int target) throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiFields.java b/lucene/core/src/java/org/apache/lucene/index/MultiFields.java
index c16738c..a537cc7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiFields.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiFields.java
@@ -158,22 +158,22 @@ public final class MultiFields extends Fields {
return null;
}
- /** Returns {@link DocsAndPositionsEnum} for the specified
+ /** Returns {@link DocsEnum} for the specified
* field & term. This will return null if the field or
* term does not exist or positions were not indexed.
* @see #getTermPositionsEnum(IndexReader, Bits, String, BytesRef, int) */
- public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
- return getTermPositionsEnum(r, liveDocs, field, term, DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS);
+ public static DocsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
+ return getTermPositionsEnum(r, liveDocs, field, term, DocsEnum.FLAG_OFFSETS | DocsEnum.FLAG_PAYLOADS);
}
- /** Returns {@link DocsAndPositionsEnum} for the specified
+ /** Returns {@link DocsEnum} for the specified
* field & term, with control over whether offsets and payloads are
* required. Some codecs may be able to optimize
* their implementation when offsets and/or payloads are not
* required. This will return null if the field or term does not
* exist or positions were not indexed. See {@link
- * TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}. */
- public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, int flags) throws IOException {
+ * TermsEnum#docs(Bits,DocsEnum,int)}. */
+ public static DocsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, int flags) throws IOException {
assert field != null;
assert term != null;
final Terms terms = getTerms(r, field);
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
index 6ae2c7c..5b71d3d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
@@ -401,7 +401,7 @@ public final class MultiTermsEnum extends TermsEnum {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
MultiDocsAndPositionsEnum docsAndPositionsEnum;
// Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum
if (reuse != null && reuse instanceof MultiDocsAndPositionsEnum) {
@@ -452,7 +452,7 @@ public final class MultiTermsEnum extends TermsEnum {
}
assert entry.index < docsAndPositionsEnum.subDocsAndPositionsEnum.length: entry.index + " vs " + docsAndPositionsEnum.subDocsAndPositionsEnum.length + "; " + subs.length;
- final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index], flags);
+ final DocsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index], flags);
if (subPostings != null) {
docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index] = subPostings;
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
index 16427cc..f12e2b8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
@@ -17,12 +17,12 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
+import java.io.IOException;
+
/** Implements a {@link TermsEnum} wrapping a provided
* {@link SortedDocValues}. */
@@ -114,7 +114,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
index 64dba95..68658ca 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
@@ -17,12 +17,12 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
+import java.io.IOException;
+
/** Implements a {@link TermsEnum} wrapping a provided
* {@link SortedSetDocValues}. */
@@ -114,7 +114,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermContext.java b/lucene/core/src/java/org/apache/lucene/index/TermContext.java
index ac80a94..262548a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermContext.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermContext.java
@@ -17,11 +17,11 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import org.apache.lucene.util.BytesRef;
+
import java.io.IOException;
import java.util.Arrays;
-import org.apache.lucene.util.BytesRef;
-
/**
* Maintains a {@link IndexReader} {@link TermState} view over
* {@link IndexReader} instances containing a single term. The
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
index 895018b..a9b4436 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
@@ -17,18 +17,18 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
+import java.io.IOException;
+
/** Iterator to seek ({@link #seekCeil(BytesRef)}, {@link
* #seekExact(BytesRef)}) or step through ({@link
* #next} terms to obtain frequency information ({@link
* #docFreq}), {@link DocsEnum} or {@link
- * DocsAndPositionsEnum} for the current term ({@link
+ * DocsEnum} for the current term ({@link
* #docs}.
*
*
Term enumerations are always ordered by
@@ -162,20 +162,20 @@ public abstract class TermsEnum implements BytesRefIterator {
* @see #docs(Bits, DocsEnum, int) */
public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException;
- /** Get {@link DocsAndPositionsEnum} for the current term.
+ /** Get {@link DocsEnum} for the current term.
* Do not call this when the enum is unpositioned. This
* method will return null if positions were not
* indexed.
*
* @param liveDocs unset bits are documents that should not
* be returned
- * @param reuse pass a prior DocsAndPositionsEnum for possible reuse
- * @see #docsAndPositions(Bits, DocsAndPositionsEnum, int) */
- public final DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
- return docsAndPositions(liveDocs, reuse, DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS);
+ * @param reuse pass a prior DocsEnum for possible reuse
+ **/
+ public final DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse) throws IOException {
+ return docsAndPositions(liveDocs, reuse, DocsEnum.FLAG_OFFSETS | DocsEnum.FLAG_PAYLOADS);
}
- /** Get {@link DocsAndPositionsEnum} for the current term,
+ /** Get {@link DocsEnum} for the current term,
* with control over whether offsets and payloads are
* required. Some codecs may be able to optimize their
* implementation when offsets and/or payloads are not required.
@@ -184,11 +184,11 @@ public abstract class TermsEnum implements BytesRefIterator {
* @param liveDocs unset bits are documents that should not
* be returned
- * @param reuse pass a prior DocsAndPositionsEnum for possible reuse
+ * @param reuse pass a prior DocsEnum for possible reuse
* @param flags specifies which optional per-position values you
- * require; see {@link DocsAndPositionsEnum#FLAG_OFFSETS} and
- * {@link DocsAndPositionsEnum#FLAG_PAYLOADS}. */
- public abstract DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException;
+ * require; see {@link DocsEnum#FLAG_OFFSETS} and
+ * {@link DocsEnum#FLAG_PAYLOADS}. */
+ public abstract DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags) throws IOException;
/**
* Expert: Returns the TermsEnums internal state to position the TermsEnum
@@ -250,11 +250,6 @@ public abstract class TermsEnum implements BytesRefIterator {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
- throw new IllegalStateException("this method should never be called");
- }
-
- @Override
public BytesRef next() {
return null;
}
@@ -273,5 +268,11 @@ public abstract class TermsEnum implements BytesRefIterator {
public void seekExact(BytesRef term, TermState state) {
throw new IllegalStateException("this method should never be called");
}
+
+ @Override
+ public DocsEnum docsAndPositions(Bits liveDocs, DocsEnum reuse, int flags)
+ throws IOException {
+ throw new IllegalStateException("this method should never be called");
+ }
};
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
index 4d7635d..7230cdb 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -17,14 +17,8 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -32,6 +26,13 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
/** A Query that matches documents matching boolean combinations of other
* queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
* BooleanQuerys.
@@ -242,7 +243,7 @@ public class BooleanQuery extends Query implements Iterable {
for (Iterator wIter = weights.iterator(); wIter.hasNext();) {
Weight w = wIter.next();
BooleanClause c = cIter.next();
- if (w.scorer(context, context.reader().getLiveDocs()) == null) {
+ if (w.scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs()) == null) {
if (c.isRequired()) {
fail = true;
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
@@ -305,13 +306,13 @@ public class BooleanQuery extends Query implements Iterable {
}
@Override
- public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder,
+ public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags,
Bits acceptDocs) throws IOException {
if (scoreDocsInOrder || minNrShouldMatch > 1) {
// TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch
// but the same is even true of pure conjunctions...
- return super.bulkScorer(context, scoreDocsInOrder, acceptDocs);
+ return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs);
}
List prohibited = new ArrayList();
@@ -319,7 +320,7 @@ public class BooleanQuery extends Query implements Iterable {
Iterator cIter = clauses.iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
- BulkScorer subScorer = w.bulkScorer(context, false, acceptDocs);
+ BulkScorer subScorer = w.bulkScorer(context, false, flags, acceptDocs);
if (subScorer == null) {
if (c.isRequired()) {
return null;
@@ -328,7 +329,7 @@ public class BooleanQuery extends Query implements Iterable {
// TODO: there are some cases where BooleanScorer
// would handle conjunctions faster than
// BooleanScorer2...
- return super.bulkScorer(context, scoreDocsInOrder, acceptDocs);
+ return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs);
} else if (c.isProhibited()) {
prohibited.add(subScorer);
} else {
@@ -340,7 +341,7 @@ public class BooleanQuery extends Query implements Iterable {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs)
+ public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs)
throws IOException {
// initially the user provided value,
// but if minNrShouldMatch == optional.size(),
@@ -353,7 +354,7 @@ public class BooleanQuery extends Query implements Iterable {
Iterator cIter = clauses.iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
- Scorer subScorer = w.scorer(context, acceptDocs);
+ Scorer subScorer = w.scorer(context, flags, acceptDocs);
if (subScorer == null) {
if (c.isRequired()) {
return null;
@@ -454,8 +455,17 @@ public class BooleanQuery extends Query implements Iterable {
// scorer() will return an out-of-order scorer if requested.
return true;
}
+
+ @Override
+ public String toString() {
+ StringBuffer sb = new StringBuffer("BooleanWeight[");
+ for (Weight weight : weights) {
+ sb.append(weight.toString()).append(",");
+ }
+ return sb.append("]").toString();
+ }
- private Scorer req(List required, boolean disableCoord) {
+ private Scorer req(List required, boolean disableCoord) throws IOException {
if (required.size() == 1) {
Scorer req = required.get(0);
if (!disableCoord && maxCoord > 1) {
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
index 173bb44..8034522 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
@@ -17,15 +17,12 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.BooleanQuery.BooleanWeight;
+import java.io.IOException;
+import java.util.List;
+
/* Description from Doug Cutting (excerpted from
* LUCENE-1483):
*
@@ -102,8 +99,13 @@ final class BooleanScorer extends BulkScorer {
return true;
}
+ @Override
+ public int postingFeatures() {
+ return DocsEnum.FLAG_FREQS;
+ }
+
}
-
+
static final class Bucket {
int doc = -1; // tells if bucket is valid
double score; // incremental score
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java b/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java
index 2c49ec7..721ade2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java
@@ -21,8 +21,6 @@ import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
-import org.apache.lucene.search.Scorer.ChildScorer;
-
/** Internal document-at-a-time scorers used to deal with stupid coord() computation */
class BooleanTopLevelScorers {
@@ -61,7 +59,7 @@ class BooleanTopLevelScorers {
private final Scorer req;
private final Scorer opt;
- CoordinatingConjunctionScorer(Weight weight, float coords[], Scorer req, int reqCount, Scorer opt) {
+ CoordinatingConjunctionScorer(Weight weight, float coords[], Scorer req, int reqCount, Scorer opt) throws IOException {
super(weight, new Scorer[] { req, opt });
this.coords = coords;
this.req = req;
diff --git a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
index c5957d8..e60e37e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
@@ -74,10 +74,16 @@ public abstract class CachingCollector extends FilterCollector {
public final int freq() { throw new UnsupportedOperationException(); }
@Override
+ public int nextPosition() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public final int nextDoc() { throw new UnsupportedOperationException(); }
@Override
public long cost() { return 1; }
+
}
private static class NoScoreCachingCollector extends CachingCollector {
@@ -96,6 +102,11 @@ public abstract class CachingCollector extends FilterCollector {
docs = new ArrayList<>();
}
+ @Override
+ public int postingFeatures() {
+ return in.postingFeatures();
+ }
+
protected NoScoreCachingLeafCollector wrap(LeafCollector in, int maxDocsToCache) {
return new NoScoreCachingLeafCollector(in, maxDocsToCache);
}
@@ -304,7 +315,7 @@ public abstract class CachingCollector extends FilterCollector {
* @param acceptDocsOutOfOrder
* whether documents are allowed to be collected out-of-order
*/
- public static CachingCollector create(final boolean acceptDocsOutOfOrder, boolean cacheScores, double maxRAMMB) {
+ public static CachingCollector create(final boolean acceptDocsOutOfOrder, final int flags, boolean cacheScores, double maxRAMMB) {
Collector other = new SimpleCollector() {
@Override
public boolean acceptsDocsOutOfOrder() {
@@ -312,6 +323,11 @@ public abstract class CachingCollector extends FilterCollector {
}
@Override
+ public int postingFeatures() {
+ return flags;
+ }
+
+ @Override
public void collect(int doc) {}
};
diff --git a/lucene/core/src/java/org/apache/lucene/search/Collector.java b/lucene/core/src/java/org/apache/lucene/search/Collector.java
index bb47394..b93ee25 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Collector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Collector.java
@@ -17,10 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.AtomicReaderContext;
+import java.io.IOException;
+
/**
*
Expert: Collectors are primarily meant to be used to
* gather raw results from a search, and implement sorting
@@ -72,5 +72,10 @@ public interface Collector {
* next atomic reader context
*/
LeafCollector getLeafCollector(AtomicReaderContext context) throws IOException;
-
+
+ /**
+ * Returns the posting features required by this collector.
+ */
+ public int postingFeatures();
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
index 3e81187..760d755 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
@@ -17,126 +17,183 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.util.ArrayUtil;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
-import org.apache.lucene.util.ArrayUtil;
-
/** Scorer for conjunctions, sets of queries, all of which are required. */
class ConjunctionScorer extends Scorer {
- protected int lastDoc = -1;
- protected final DocsAndFreqs[] docsAndFreqs;
- private final DocsAndFreqs lead;
+
+ private final Scorer[] scorersOrdered;
+ private final Scorer[] scorers;
+ private int lastDoc = -1;
private final float coord;
+ final PositionQueue posQueue;
- ConjunctionScorer(Weight weight, Scorer[] scorers) {
+ public ConjunctionScorer(Weight weight, Scorer[] scorers) throws IOException {
this(weight, scorers, 1f);
}
- ConjunctionScorer(Weight weight, Scorer[] scorers, float coord) {
+ public ConjunctionScorer(Weight weight, Scorer[] scorers, float coord) throws IOException {
super(weight);
+ scorersOrdered = new Scorer[scorers.length];
+ System.arraycopy(scorers, 0, scorersOrdered, 0, scorers.length);
+ this.scorers = scorers;
this.coord = coord;
- this.docsAndFreqs = new DocsAndFreqs[scorers.length];
+ posQueue = new PositionQueue(scorers);
+
for (int i = 0; i < scorers.length; i++) {
- docsAndFreqs[i] = new DocsAndFreqs(scorers[i]);
+ if (scorers[i].nextDoc() == NO_MORE_DOCS) {
+ // If even one of the sub-scorers does not have any documents, this
+ // scorer should not attempt to do any more work.
+ lastDoc = NO_MORE_DOCS;
+ return;
+ }
}
- // Sort the array the first time to allow the least frequent DocsEnum to
- // lead the matching.
- ArrayUtil.timSort(docsAndFreqs, new Comparator() {
+
+ // Sort the array the first time...
+ // We don't need to sort the array in any future calls because we know
+ // it will already start off sorted (all scorers on same doc).
+
+ // Note that this comparator is not consistent with equals!
+ // Also we use mergeSort here to be stable (so order of Scorers that
+ // match on first document keeps preserved):
+ ArrayUtil.timSort(scorers, new Comparator() { // sort the array
@Override
- public int compare(DocsAndFreqs o1, DocsAndFreqs o2) {
- return Long.compare(o1.cost, o2.cost);
+ public int compare(Scorer o1, Scorer o2) {
+ return o1.docID() - o2.docID();
}
});
- lead = docsAndFreqs[0]; // least frequent DocsEnum leads the intersection
- }
+ // NOTE: doNext() must be called before the re-sorting of the array later on.
+ // The reason is this: assume there are 5 scorers, whose first docs are 1,
+ // 2, 3, 5, 5 respectively. Sorting (above) leaves the array as is. Calling
+ // doNext() here advances all the first scorers to 5 (or a larger doc ID
+ // they all agree on).
+ // However, if we re-sort before doNext() is called, the order will be 5, 3,
+ // 2, 1, 5 and then doNext() will stop immediately, since the first scorer's
+ // docs equals the last one. So the invariant that after calling doNext()
+ // all scorers are on the same doc ID is broken.
+ if (doNext() == NO_MORE_DOCS) {
+ // The scorers did not agree on any document.
+ lastDoc = NO_MORE_DOCS;
+ return;
+ }
- private int doNext(int doc) throws IOException {
- for(;;) {
- // doc may already be NO_MORE_DOCS here, but we don't check explicitly
- // since all scorers should advance to NO_MORE_DOCS, match, then
- // return that value.
- advanceHead: for(;;) {
- for (int i = 1; i < docsAndFreqs.length; i++) {
- // invariant: docsAndFreqs[i].doc <= doc at this point.
-
- // docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead"
- // on the previous iteration and the advance on the lead scorer exactly matched.
- if (docsAndFreqs[i].doc < doc) {
- docsAndFreqs[i].doc = docsAndFreqs[i].scorer.advance(doc);
-
- if (docsAndFreqs[i].doc > doc) {
- // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
- doc = docsAndFreqs[i].doc;
- break advanceHead;
- }
- }
- }
- // success - all DocsEnums are on the same doc
- return doc;
- }
- // advance head for next iteration
- doc = lead.doc = lead.scorer.advance(doc);
+ // If first-time skip distance is any predictor of
+ // scorer sparseness, then we should always try to skip first on
+ // those scorers.
+ // Keep last scorer in it's last place (it will be the first
+ // to be skipped on), but reverse all of the others so that
+ // they will be skipped on in order of original high skip.
+ int end = scorers.length - 1;
+ int max = end >> 1;
+ for (int i = 0; i < max; i++) {
+ Scorer tmp = scorers[i];
+ int idx = end - i - 1;
+ scorers[i] = scorers[idx];
+ scorers[idx] = tmp;
}
}
+ private int doNext() throws IOException {
+ int first = 0;
+ int doc = scorers[scorers.length - 1].docID();
+ Scorer firstScorer;
+ while ((firstScorer = scorers[first]).docID() < doc) {
+ doc = firstScorer.advance(doc);
+ first = first == scorers.length - 1 ? 0 : first + 1;
+ }
+ posQueue.advanceTo(doc);
+ return doc;
+ }
+
@Override
public int advance(int target) throws IOException {
- lead.doc = lead.scorer.advance(target);
- return lastDoc = doNext(lead.doc);
+ if (lastDoc == NO_MORE_DOCS) {
+ return lastDoc;
+ } else if (scorers[(scorers.length - 1)].docID() < target) {
+ scorers[(scorers.length - 1)].advance(target);
+ }
+ return lastDoc = doNext();
}
@Override
public int docID() {
return lastDoc;
}
-
+
@Override
public int nextDoc() throws IOException {
- lead.doc = lead.scorer.nextDoc();
- return lastDoc = doNext(lead.doc);
+ if (lastDoc == NO_MORE_DOCS) {
+ return lastDoc;
+ } else if (lastDoc == -1) {
+ lastDoc = scorers[scorers.length - 1].docID();
+ posQueue.advanceTo(lastDoc);
+ return lastDoc;
+ }
+ scorers[(scorers.length - 1)].nextDoc();
+ return lastDoc = doNext();
}
-
+
@Override
public float score() throws IOException {
// TODO: sum into a double and cast to float if we ever send required clauses to BS1
float sum = 0.0f;
- for (DocsAndFreqs docs : docsAndFreqs) {
- sum += docs.scorer.score();
+ for (int i = 0; i < scorers.length; i++) {
+ sum += scorers[i].score();
}
return sum * coord;
}
-
+
@Override
- public int freq() {
- return docsAndFreqs.length;
+ public int freq() throws IOException {
+ return scorers.length;
}
@Override
- public long cost() {
- return lead.scorer.cost();
+ public int nextPosition() throws IOException {
+ return posQueue.nextPosition();
}
@Override
- public Collection getChildren() {
- ArrayList children = new ArrayList<>(docsAndFreqs.length);
- for (DocsAndFreqs docs : docsAndFreqs) {
- children.add(new ChildScorer(docs.scorer, "MUST"));
+ public int startPosition() throws IOException {
+ return posQueue.startPosition();
+ }
+
+ @Override
+ public int endPosition() throws IOException {
+ return posQueue.endPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return posQueue.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return posQueue.endOffset();
+ }
+
+ @Override
+ public long cost() {
+ long sum = 0;
+ for (int i = 0; i < scorers.length; i++) {
+ sum += scorers[i].cost();
}
- return children;
+ return sum; // nocommit is this right?
}
- static final class DocsAndFreqs {
- final long cost;
- final Scorer scorer;
- int doc = -1;
-
- DocsAndFreqs(Scorer scorer) {
- this.scorer = scorer;
- this.cost = scorer.cost();
+ @Override
+ public Collection getChildren() {
+ ArrayList children = new ArrayList(scorers.length);
+ for (Scorer scorer : scorersOrdered) {
+ children.add(new ChildScorer(scorer, "MUST"));
}
+ return children;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
index 2b7f4ed..7aa4984 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.Bits;
@@ -134,14 +135,14 @@ public class ConstantScoreQuery extends Query {
}
@Override
- public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException {
+ public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) throws IOException {
final DocIdSetIterator disi;
if (filter != null) {
assert query == null;
- return super.bulkScorer(context, scoreDocsInOrder, acceptDocs);
+ return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs);
} else {
assert query != null && innerWeight != null;
- BulkScorer bulkScorer = innerWeight.bulkScorer(context, scoreDocsInOrder, acceptDocs);
+ BulkScorer bulkScorer = innerWeight.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs);
if (bulkScorer == null) {
return null;
}
@@ -150,7 +151,7 @@ public class ConstantScoreQuery extends Query {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException {
final DocIdSetIterator disi;
if (filter != null) {
assert query == null;
@@ -161,7 +162,7 @@ public class ConstantScoreQuery extends Query {
disi = dis.iterator();
} else {
assert query != null && innerWeight != null;
- disi = innerWeight.scorer(context, acceptDocs);
+ disi = innerWeight.scorer(context, flags, acceptDocs);
}
if (disi == null) {
@@ -177,7 +178,7 @@ public class ConstantScoreQuery extends Query {
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- final Scorer cs = scorer(context, context.reader().getLiveDocs());
+ final Scorer cs = scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs());
final boolean exists = (cs != null && cs.advance(doc) == doc);
final ComplexExplanation result = new ComplexExplanation();
@@ -259,10 +260,15 @@ public class ConstantScoreQuery extends Query {
}
@Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
+ @Override
public int advance(int target) throws IOException {
return docIdSetIterator.advance(target);
}
-
+
@Override
public long cost() {
return docIdSetIterator.cost();
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
index c195497..3888271 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
@@ -16,6 +16,11 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.Bits;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@@ -23,11 +28,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Set;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.util.Bits;
-
/**
* A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum
* score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries.
@@ -153,11 +153,11 @@ public class DisjunctionMaxQuery extends Query implements Iterable {
/** Create the scorer used to score our associated DisjunctionMaxQuery */
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException {
List scorers = new ArrayList<>();
for (Weight w : weights) {
// we will advance() subscorers
- Scorer subScorer = w.scorer(context, acceptDocs);
+ Scorer subScorer = w.scorer(context, flags, acceptDocs);
if (subScorer != null) {
scorers.add(subScorer);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
index b5d0a0d..e80242e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
@@ -46,6 +46,7 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, Scorer[] subScorers) {
super(weight, subScorers);
this.tieBreakerMultiplier = tieBreakerMultiplier;
+
}
@Override
@@ -66,4 +67,5 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
protected float getFinal() {
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
}
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
index 5b7e2ff..f4b8127 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
@@ -20,23 +20,26 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Locale;
/**
* Base class for Scorers that score disjunctions.
*/
abstract class DisjunctionScorer extends Scorer {
- private final Scorer subScorers[];
- private int numScorers;
+ protected final Scorer subScorers[];
/** The document number of the current match. */
protected int doc = -1;
+ protected int numScorers;
+ protected PositionQueue posQueue;
/** Number of matching scorers for the current match. */
protected int freq = -1;
-
+
protected DisjunctionScorer(Weight weight, Scorer subScorers[]) {
super(weight);
this.subScorers = subScorers;
this.numScorers = subScorers.length;
+ this.posQueue = new PositionQueue(subScorers);
if (numScorers <= 1) {
throw new IllegalArgumentException("There must be at least 2 subScorers");
}
@@ -115,6 +118,45 @@ abstract class DisjunctionScorer extends Scorer {
}
@Override
+ public int nextPosition() throws IOException {
+ //System.out.println("Advancing " + this.toString());
+ int pos = posQueue.nextPosition();
+ //System.out.println(this);
+ return pos;
+ }
+
+ @Override
+ public int startPosition() throws IOException {
+ return posQueue.startPosition();
+ }
+
+ @Override
+ public int endPosition() throws IOException {
+ return posQueue.endPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return posQueue.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return posQueue.endOffset();
+ }
+
+ @Override
+ public String toString() {
+ try {
+ return String.format(Locale.ROOT, "DisjScorer[%s] %d(%d)->%d(%d)", weight.toString(),
+ posQueue.startPosition(),
+ posQueue.startOffset(), posQueue.endPosition(), posQueue.endOffset());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
public final long cost() {
long sum = 0;
for (int i = 0; i < numScorers; i++) {
@@ -143,6 +185,7 @@ abstract class DisjunctionScorer extends Scorer {
int docID = subScorers[0].docID();
if (docID != doc) {
freq = -1;
+ posQueue.advanceTo(docID);
return doc = docID;
}
}
@@ -163,11 +206,12 @@ abstract class DisjunctionScorer extends Scorer {
int docID = subScorers[0].docID();
if (docID >= target) {
freq = -1;
+ posQueue.advanceTo(docID);
return doc = docID;
}
}
}
-
+
// if we haven't already computed freq + score, do so
private void visitScorers() throws IOException {
reset();
@@ -209,4 +253,5 @@ abstract class DisjunctionScorer extends Scorer {
/** Return final score */
protected abstract float getFinal();
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
index f291695..f775ad6 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
@@ -19,13 +19,14 @@ package org.apache.lucene.search;
import java.io.IOException;
+
/** A Scorer for OR like queries, counterpart of ConjunctionScorer.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
*/
final class DisjunctionSumScorer extends DisjunctionScorer {
private double score;
private final float[] coord;
-
+
/** Construct a DisjunctionScorer.
* @param weight The weight to be used.
* @param subScorers Array of at least two subscorers.
@@ -50,4 +51,5 @@ final class DisjunctionSumScorer extends DisjunctionScorer {
protected float getFinal() {
return (float)score * coord[freq];
}
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
index e73b241..7f31062 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@@ -17,50 +17,54 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.search.PhraseQuery.TermDocsEnumFactory;
+import org.apache.lucene.search.similarities.Similarity;
+
import java.io.IOException;
import java.util.Arrays;
-import org.apache.lucene.index.*;
-import org.apache.lucene.search.similarities.Similarity;
-
final class ExactPhraseScorer extends Scorer {
private final int endMinus1;
-
+
private final static int CHUNK = 4096;
-
+
private int gen;
private final int[] counts = new int[CHUNK];
private final int[] gens = new int[CHUNK];
+
+ boolean noDocs;
private final long cost;
private final static class ChunkState {
- final DocsAndPositionsEnum posEnum;
+ final TermDocsEnumFactory factory;
+ final DocsEnum posEnum;
final int offset;
int posUpto;
int posLimit;
int pos;
int lastPos;
- public ChunkState(DocsAndPositionsEnum posEnum, int offset) {
+ public ChunkState(TermDocsEnumFactory factory, DocsEnum posEnum, int offset) {
+ this.factory = factory;
this.posEnum = posEnum;
this.offset = offset;
}
}
-
+
private final ChunkState[] chunkStates;
- private final DocsAndPositionsEnum lead;
+ private final DocsEnum lead;
private int docID = -1;
- private int freq;
private final Similarity.SimScorer docScorer;
-
+
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SimScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;
-
+
chunkStates = new ChunkState[postings.length];
endMinus1 = postings.length-1;
@@ -70,7 +74,7 @@ final class ExactPhraseScorer extends Scorer {
cost = lead.cost();
for(int i=0;i 0) {
+ } else if (firstPosition() != NO_MORE_POSITIONS) {
return doc; // success: matches phrase
} else {
doc = lead.nextDoc(); // doesn't match phrase
@@ -103,7 +107,7 @@ final class ExactPhraseScorer extends Scorer {
doc = lead.advance(doc);
}
}
-
+
@Override
public int nextDoc() throws IOException {
return docID = doNext(lead.nextDoc());
@@ -113,51 +117,116 @@ final class ExactPhraseScorer extends Scorer {
public int advance(int target) throws IOException {
return docID = doNext(lead.advance(target));
}
-
+
@Override
public String toString() {
return "ExactPhraseScorer(" + weight + ")";
}
-
+
@Override
- public int freq() {
+ public int freq() throws IOException {
+ int freq = 0;
+ while (nextPosition() != NO_MORE_DOCS) {
+ freq++;
+ }
return freq;
}
-
+
@Override
public int docID() {
return docID;
}
-
+
@Override
- public float score() {
- return docScorer.score(docID, freq);
+ public float score() throws IOException {
+ return docScorer.score(docID, freq());
}
- private int phraseFreq() throws IOException {
+ private int chunkStart = 0;
+ private int chunkEnd = CHUNK;
- freq = 0;
+ private int posRemaining;
+ private int positionsInChunk;
+ private boolean cached = false;
- // init chunks
- for(int i=0;i cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
@@ -179,7 +248,7 @@ final class ExactPhraseScorer extends Scorer {
}
if (cs.posUpto == cs.posLimit) {
- end = true;
+ exhausted = true;
break;
}
cs.posUpto++;
@@ -189,10 +258,10 @@ final class ExactPhraseScorer extends Scorer {
// middle terms
boolean any = true;
- for(int t=1;t cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
@@ -204,7 +273,7 @@ final class ExactPhraseScorer extends Scorer {
}
if (cs.posUpto == cs.posLimit) {
- end = true;
+ exhausted = true;
break;
}
cs.posUpto++;
@@ -220,6 +289,8 @@ final class ExactPhraseScorer extends Scorer {
// petered out for this chunk
chunkStart += CHUNK;
chunkEnd += CHUNK;
+ if (exhausted)
+ return false;
continue;
}
@@ -227,17 +298,17 @@ final class ExactPhraseScorer extends Scorer {
{
final ChunkState cs = chunkStates[endMinus1];
- while(cs.pos < chunkEnd) {
+ while (cs.pos < chunkEnd) {
if (cs.pos > cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
- if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) {
- freq++;
+ if (posIndex >= 0 && gens[posIndex] == gen
+ && counts[posIndex] == endMinus1) {
+ addPosition(cs.pos);
}
}
if (cs.posUpto == cs.posLimit) {
- end = true;
break;
}
cs.posUpto++;
@@ -247,13 +318,15 @@ final class ExactPhraseScorer extends Scorer {
chunkStart += CHUNK;
chunkEnd += CHUNK;
- }
- return freq;
+ posRemaining = positionsInChunk;
+ return true;
+ }
}
@Override
public long cost() {
return cost;
}
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java
index e2a50c8..66e130d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import java.io.IOException;
import java.util.Collection;
/** Used by {@link BulkScorer}s that need to pass a {@link
@@ -46,6 +47,11 @@ final class FakeScorer extends Scorer {
}
@Override
+ public int nextPosition() throws IOException {
+ throw new UnsupportedOperationException("FakeScorer doesn't support nextPosition()");
+ }
+
+ @Override
public int nextDoc() {
throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()");
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java b/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java
index 247bb03..3f6e74e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java
@@ -1,9 +1,9 @@
package org.apache.lucene.search;
-import java.io.IOException;
-
import org.apache.lucene.index.AtomicReaderContext;
+import java.io.IOException;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -41,6 +41,11 @@ public class FilterCollector implements Collector {
}
@Override
+ public int postingFeatures() {
+ return in.postingFeatures();
+ }
+
+ @Override
public String toString() {
return getClass().getSimpleName() + "(" + in + ")";
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java
index 88881bd..c2bfab6 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java
@@ -17,11 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Collection;
-
import org.apache.lucene.util.AttributeSource;
+import java.io.IOException;
+
/**
* A {@code FilterScorer} contains another {@code Scorer}, which it
* uses as its basic source of data, possibly transforming the data along the
@@ -61,6 +60,11 @@ abstract class FilterScorer extends Scorer {
}
@Override
+ public int nextPosition() throws IOException {
+ return in.nextPosition();
+ }
+
+ @Override
public int advance(int target) throws IOException {
return in.advance(target);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java b/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
index d700a30..74b8828 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
@@ -124,7 +124,7 @@ public class FilteredQuery extends Query {
// return a filtering scorer
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException {
assert filter != null;
DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs);
@@ -133,12 +133,13 @@ public class FilteredQuery extends Query {
return null;
}
- return strategy.filteredScorer(context, weight, filterDocIdSet);
+ return strategy.filteredScorer(context, weight, filterDocIdSet, flags);
}
// return a filtering top scorer
@Override
- public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException {
+ public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) throws IOException {
+
assert filter != null;
DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs);
@@ -147,7 +148,8 @@ public class FilteredQuery extends Query {
return null;
}
- return strategy.filteredBulkScorer(context, weight, scoreDocsInOrder, filterDocIdSet);
+ return strategy.filteredBulkScorer(context, weight, scoreDocsInOrder, filterDocIdSet, flags);
+
}
};
}
@@ -189,7 +191,6 @@ public class FilteredQuery extends Query {
return scorerDoc = doc;
}
}
-
@Override
public int docID() {
return scorerDoc;
@@ -202,7 +203,12 @@ public class FilteredQuery extends Query {
@Override
public int freq() throws IOException { return scorer.freq(); }
-
+
+ @Override
+ public int nextPosition() throws IOException {
+ return scorer.nextPosition();
+ }
+
@Override
public Collection getChildren() {
return Collections.singleton(new ChildScorer(scorer, "FILTERED"));
@@ -312,7 +318,12 @@ public class FilteredQuery extends Query {
public final int freq() throws IOException {
return scorer.freq();
}
-
+
+ @Override
+ public int nextPosition() throws IOException {
+ return scorer.nextPosition();
+ }
+
@Override
public final Collection getChildren() {
return Collections.singleton(new ChildScorer(scorer, "FILTERED"));
@@ -480,12 +491,13 @@ public class FilteredQuery extends Query {
* the {@link AtomicReaderContext} for which to return the {@link Scorer}.
* @param weight the {@link FilteredQuery} {@link Weight} to create the filtered scorer.
* @param docIdSet the filter {@link DocIdSet} to apply
+ * @param flags the low level Posting Features for this scorer.
* @return a filtered scorer
*
* @throws IOException if an {@link IOException} occurs
*/
public abstract Scorer filteredScorer(AtomicReaderContext context,
- Weight weight, DocIdSet docIdSet) throws IOException;
+ Weight weight, DocIdSet docIdSet, int flags) throws IOException;
/**
* Returns a filtered {@link BulkScorer} based on this
@@ -500,8 +512,8 @@ public class FilteredQuery extends Query {
* @return a filtered top scorer
*/
public BulkScorer filteredBulkScorer(AtomicReaderContext context,
- Weight weight, boolean scoreDocsInOrder, DocIdSet docIdSet) throws IOException {
- Scorer scorer = filteredScorer(context, weight, docIdSet);
+ Weight weight, boolean scoreDocsInOrder, DocIdSet docIdSet, int flags) throws IOException {
+ Scorer scorer = filteredScorer(context, weight, docIdSet, flags);
if (scorer == null) {
return null;
}
@@ -509,6 +521,7 @@ public class FilteredQuery extends Query {
// ignore scoreDocsInOrder:
return new Weight.DefaultBulkScorer(scorer);
}
+
}
/**
@@ -522,7 +535,7 @@ public class FilteredQuery extends Query {
public static class RandomAccessFilterStrategy extends FilterStrategy {
@Override
- public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet) throws IOException {
+ public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet, int flags) throws IOException {
final DocIdSetIterator filterIter = docIdSet.iterator();
if (filterIter == null) {
// this means the filter does not accept any documents.
@@ -539,12 +552,12 @@ public class FilteredQuery extends Query {
final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc);
if (useRandomAccess) {
// if we are using random access, we return the inner scorer, just with other acceptDocs
- return weight.scorer(context, filterAcceptDocs);
+ return weight.scorer(context, flags, filterAcceptDocs);
} else {
assert firstFilterDoc > -1;
// we are gonna advance() this scorer, so we set inorder=true/toplevel=false
// we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice
- final Scorer scorer = weight.scorer(context, null);
+ final Scorer scorer = weight.scorer(context, flags, null);
// TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer
return (scorer == null) ? null : new PrimaryAdvancedLeapFrogScorer(weight, firstFilterDoc, filterIter, scorer);
}
@@ -578,14 +591,14 @@ public class FilteredQuery extends Query {
@Override
public Scorer filteredScorer(AtomicReaderContext context,
- Weight weight, DocIdSet docIdSet) throws IOException {
+ Weight weight, DocIdSet docIdSet, int flags) throws IOException {
final DocIdSetIterator filterIter = docIdSet.iterator();
if (filterIter == null) {
// this means the filter does not accept any documents.
return null;
}
// we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice
- final Scorer scorer = weight.scorer(context, null);
+ final Scorer scorer = weight.scorer(context, flags, null);
if (scorer == null) {
return null;
}
@@ -615,30 +628,29 @@ public class FilteredQuery extends Query {
@Override
public Scorer filteredScorer(final AtomicReaderContext context,
Weight weight,
- DocIdSet docIdSet) throws IOException {
+ DocIdSet docIdSet, int flags) throws IOException {
Bits filterAcceptDocs = docIdSet.bits();
if (filterAcceptDocs == null) {
// Filter does not provide random-access Bits; we
// must fallback to leapfrog:
- return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet);
+ return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet, flags);
}
- final Scorer scorer = weight.scorer(context, null);
- return scorer == null ? null : new QueryFirstScorer(weight,
- filterAcceptDocs, scorer);
+ final Scorer scorer = weight.scorer(context, flags, null);
+ return scorer == null ? null : new QueryFirstScorer(weight, filterAcceptDocs, scorer);
}
@Override
public BulkScorer filteredBulkScorer(final AtomicReaderContext context,
Weight weight,
boolean scoreDocsInOrder, // ignored (we always top-score in order)
- DocIdSet docIdSet) throws IOException {
+ DocIdSet docIdSet, int flags) throws IOException {
Bits filterAcceptDocs = docIdSet.bits();
if (filterAcceptDocs == null) {
// Filter does not provide random-access Bits; we
// must fallback to leapfrog:
- return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredBulkScorer(context, weight, scoreDocsInOrder, docIdSet);
+ return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredBulkScorer(context, weight, scoreDocsInOrder, docIdSet, flags);
}
- final Scorer scorer = weight.scorer(context, null);
+ final Scorer scorer = weight.scorer(context, flags, null);
return scorer == null ? null : new QueryFirstBulkScorer(scorer, filterAcceptDocs);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
index 3199966..8a18443 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
@@ -22,7 +22,7 @@ import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
-import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.Term;
@@ -271,8 +271,8 @@ public class FuzzyTermsEnum extends TermsEnum {
}
@Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
- DocsAndPositionsEnum reuse, int flags) throws IOException {
+ public DocsEnum docsAndPositions(Bits liveDocs,
+ DocsEnum reuse, int flags) throws IOException {
return actualEnum.docsAndPositions(liveDocs, reuse, flags);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
index 116304e..e74ae81 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -17,6 +17,23 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.index.StoredDocument;
+import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.store.NIOFSDirectory;
+import org.apache.lucene.util.ThreadInterruptedException;
+
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
@@ -32,23 +49,6 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DirectoryReader; // javadocs
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.IndexReaderContext;
-import org.apache.lucene.index.ReaderUtil;
-import org.apache.lucene.index.StoredDocument;
-import org.apache.lucene.index.StoredFieldVisitor;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.search.similarities.DefaultSimilarity;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.store.NIOFSDirectory; // javadoc
-import org.apache.lucene.util.ThreadInterruptedException;
-import org.apache.lucene.index.IndexWriter; // javadocs
-
/** Implements search over a single IndexReader.
*
*
Applications usually need only call the inherited
@@ -608,7 +608,7 @@ public class IndexSearcher {
// continue with the following leaf
continue;
}
- BulkScorer scorer = weight.bulkScorer(ctx, !leafCollector.acceptsDocsOutOfOrder(), ctx.reader().getLiveDocs());
+ BulkScorer scorer = weight.bulkScorer(ctx, !leafCollector.acceptsDocsOutOfOrder(), collector.postingFeatures(), ctx.reader().getLiveDocs());
if (scorer != null) {
try {
scorer.score(leafCollector);
diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
index 8f2edd7..987666b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
@@ -20,11 +20,11 @@ package org.apache.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ToStringUtils;
-import java.util.Set;
import java.io.IOException;
+import java.util.Set;
/**
* A query that matches all documents.
@@ -73,6 +73,11 @@ public class MatchAllDocsQuery extends Query {
}
@Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
+ @Override
public int advance(int target) throws IOException {
doc = target-1;
return nextDoc();
@@ -114,7 +119,7 @@ public class MatchAllDocsQuery extends Query {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException {
return new MatchAllScorer(context.reader(), acceptDocs, this, queryWeight);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
index 3f31ace..eeb9711 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
@@ -17,14 +17,14 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.util.ArrayUtil;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
-import org.apache.lucene.util.ArrayUtil;
-
/**
* A Scorer for OR like queries, counterpart of ConjunctionScorer.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
@@ -62,6 +62,8 @@ class MinShouldMatchSumScorer extends Scorer {
private final float coord[];
+ private final PositionQueue posQueue;
+
/**
* Construct a MinShouldMatchSumScorer.
*
@@ -110,6 +112,8 @@ class MinShouldMatchSumScorer extends Scorer {
this.coord = coord;
minheapHeapify();
assert minheapCheck();
+
+ posQueue = new PositionQueue(subScorers.toArray(new Scorer[subScorers.size()]));
}
@Override
@@ -145,6 +149,7 @@ class MinShouldMatchSumScorer extends Scorer {
break;
}
}
+ posQueue.advanceTo(doc);
return doc;
}
@@ -231,6 +236,11 @@ class MinShouldMatchSumScorer extends Scorer {
return nrMatchers;
}
+ @Override
+ public int nextPosition() throws IOException {
+ return posQueue.nextPosition();
+ }
+
/**
* Advances to the first match beyond the current whose document number is
* greater than or equal to a given target.
@@ -261,6 +271,7 @@ class MinShouldMatchSumScorer extends Scorer {
evaluateSmallestDocInHeap();
if (nrMatchers >= mm) {
+ posQueue.advanceTo(doc);
return doc;
} else {
return nextDoc();
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
index 859b893..3a02e00 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
@@ -17,13 +17,11 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.AtomicReaderContext;
+
import java.io.IOException;
import java.util.Arrays;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.Scorer;
-
/**
* A {@link Collector} which allows running a search with several
* {@link Collector}s. It offers a static {@link #wrap} method which accepts a
@@ -103,6 +101,15 @@ public class MultiCollector implements Collector {
return new MultiLeafCollector(leafCollectors);
}
+ @Override
+ public int postingFeatures() {
+ int pf = 0;
+ for (Collector collector : collectors) {
+ pf |= collector.postingFeatures();
+ }
+ return pf;
+ }
+
private static class MultiLeafCollector implements LeafCollector {
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
index 902e6aa..0b21469 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@@ -1,6 +1,6 @@
package org.apache.lucene.search;
-/*
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,12 +17,8 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.*;
-
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
@@ -31,14 +27,28 @@ import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.search.PhraseQuery.TermDocsEnumFactory;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.ToStringUtils;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Map;
+import java.util.Set;
+
/**
* MultiPhraseQuery is a generalized version of PhraseQuery, with an added
* method {@link #add(Term[])}.
@@ -179,7 +189,7 @@ public class MultiPhraseQuery extends Query {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException {
assert !termArrays.isEmpty();
final AtomicReader reader = context.reader();
final Bits liveDocs = acceptDocs;
@@ -197,9 +207,9 @@ public class MultiPhraseQuery extends Query {
for (int pos=0; pos 1) {
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);
@@ -221,6 +231,7 @@ public class MultiPhraseQuery extends Query {
// None of the terms are in this reader
return null;
}
+ factory = new MultiTermDocsEnumFactory(liveDocs, context, terms, termContexts, termsEnum, flags);
} else {
final Term term = terms[0];
TermState termState = termContexts.get(term).get(context.ord);
@@ -237,10 +248,10 @@ public class MultiPhraseQuery extends Query {
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}
- docFreq = termsEnum.docFreq();
+ factory = new TermDocsEnumFactory(term.bytes(), termState, termsEnum, flags, acceptDocs);
}
-
- postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms);
+
+ postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, factory, termsEnum.docFreq() , positions.get(pos).intValue(), terms);
}
// sort by increasing docFreq order
@@ -257,7 +268,7 @@ public class MultiPhraseQuery extends Query {
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- Scorer scorer = scorer(context, context.reader().getLiveDocs());
+ Scorer scorer = scorer(context, DocsEnum.FLAG_POSITIONS, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
@@ -401,6 +412,27 @@ public class MultiPhraseQuery extends Query {
}
return true;
}
+
+ private static class MultiTermDocsEnumFactory extends TermDocsEnumFactory {
+
+ AtomicReaderContext context;
+ Term[] terms;
+ Map termContexts;
+
+ MultiTermDocsEnumFactory(Bits liveDocs, AtomicReaderContext context, Term[] terms,
+ Map termContexts, TermsEnum termsEnum, int flags) throws IOException {
+ super(termsEnum, flags, liveDocs);
+ this.context = context;
+ this.terms = terms;
+ this.termContexts = termContexts;
+ }
+
+ @Override
+ public DocsEnum docsAndPositionsEnum() throws IOException {
+ return new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum, flags);
+ }
+
+ }
}
/**
@@ -408,15 +440,15 @@ public class MultiPhraseQuery extends Query {
*/
// TODO: if ever we allow subclassing of the *PhraseScorer
-class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
+class UnionDocsAndPositionsEnum extends DocsEnum {
- private static final class DocsQueue extends PriorityQueue {
- DocsQueue(List docsEnums) throws IOException {
+ private static final class DocsQueue extends PriorityQueue {
+ DocsQueue(List docsEnums) throws IOException {
super(docsEnums.size());
- Iterator i = docsEnums.iterator();
+ Iterator i = docsEnums.iterator();
while (i.hasNext()) {
- DocsAndPositionsEnum postings = i.next();
+ DocsEnum postings = i.next();
if (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
add(postings);
}
@@ -424,30 +456,46 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
@Override
- public final boolean lessThan(DocsAndPositionsEnum a, DocsAndPositionsEnum b) {
+ public final boolean lessThan(DocsEnum a, DocsEnum b) {
return a.docID() < b.docID();
}
}
- private static final class IntQueue {
- private int _arraySize = 16;
+ // TODO: Reimplement this as int[_arraySize * 3], storing position at i * 3,
+ // startOffset at i * 3 + 1 and endOffset at i * 3 + 2. Will need to also
+ // implement a new SorterTemplate to sort the array.
+
+ private static final class PositionQueue {
+ private int _arraySize = 48;
private int _index = 0;
private int _lastIndex = 0;
private int[] _array = new int[_arraySize];
- final void add(int i) {
- if (_lastIndex == _arraySize)
+ final void add(int pos, int start, int end) {
+ if (_lastIndex * 3 == _arraySize)
growArray();
- _array[_lastIndex++] = i;
+ _array[_lastIndex * 3] = pos;
+ _array[_lastIndex * 3 + 1] = start;
+ _array[_lastIndex * 3 + 2] = end;
+ _lastIndex += 1;
}
final int next() {
- return _array[_index++];
+ return _array[_index++ * 3];
+ }
+
+ final int startOffset() {
+ return _array[(_index - 1) * 3 + 1];
+ }
+
+ final int endOffset() {
+ return _array[(_index - 1) * 3 + 2];
}
final void sort() {
- Arrays.sort(_array, _index, _lastIndex);
+ //Arrays.sort(_array, _index, _lastIndex);
+ sorter.sort(_index, _lastIndex - 1);
}
final void clear() {
@@ -465,16 +513,54 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
_array = newArray;
_arraySize *= 2;
}
+
+ private IntroSorter sorter = new IntroSorter() {
+ private int pivot;
+
+ @Override
+ protected void swap(int i, int j) {
+ int ti = _array[i * 3];
+ int ts = _array[i * 3 + 1];
+ int te = _array[i * 3 + 2];
+ _array[i * 3] = _array[j * 3];
+ _array[i * 3 + 1] = _array[j * 3 + 1];
+ _array[i * 3 + 2] = _array[j * 3 + 2];
+ _array[j * 3] = ti;
+ _array[j * 3 + 1] = ts;
+ _array[j * 3 + 2] = te;
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return _array[i * 3] - _array[j * 3];
+ }
+
+ @Override
+ protected void setPivot(int i) {
+ pivot = i;
+ }
+
+ @Override
+ protected int comparePivot(int j) {
+ return pivot - _array[j * 3];
+ }
+ };
}
private int _doc = -1;
private int _freq;
private DocsQueue _queue;
- private IntQueue _posList;
+ private PositionQueue _posList;
+ private int posPending;
private long cost;
- public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map termContexts, TermsEnum termsEnum) throws IOException {
- List docsEnums = new LinkedList<>();
+ public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms,
+ Map termContexts, TermsEnum termsEnum) throws IOException {
+ this(liveDocs, context, terms, termContexts, termsEnum, DocsEnum.FLAG_POSITIONS);
+ }
+
+ public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map termContexts, TermsEnum termsEnum, int flags) throws IOException {
+ List docsEnums = new LinkedList<>();
for (int i = 0; i < terms.length; i++) {
final Term term = terms[i];
TermState termState = termContexts.get(term).get(context.ord);
@@ -483,7 +569,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
continue;
}
termsEnum.seekExact(term.bytes(), termState);
- DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
+ DocsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
if (postings == null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
@@ -493,7 +579,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
_queue = new DocsQueue(docsEnums);
- _posList = new IntQueue();
+ _posList = new PositionQueue();
}
@Override
@@ -509,13 +595,13 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
_doc = _queue.top().docID();
// merge sort all positions together
- DocsAndPositionsEnum postings;
+ DocsEnum postings;
do {
postings = _queue.top();
final int freq = postings.freq();
for (int i = 0; i < freq; i++) {
- _posList.add(postings.nextPosition());
+ _posList.add(postings.nextPosition(), postings.startOffset(), postings.endOffset());
}
if (postings.nextDoc() != NO_MORE_DOCS) {
@@ -527,23 +613,27 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
_posList.sort();
_freq = _posList.size();
+ posPending = _freq;
return _doc;
}
@Override
public int nextPosition() {
+ if (posPending == 0)
+ return NO_MORE_POSITIONS;
+ posPending--;
return _posList.next();
}
@Override
public int startOffset() {
- return -1;
+ return _posList.startOffset();
}
@Override
public int endOffset() {
- return -1;
+ return _posList.endOffset();
}
@Override
@@ -554,7 +644,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
@Override
public final int advance(int target) throws IOException {
while (_queue.top() != null && target > _queue.top().docID()) {
- DocsAndPositionsEnum postings = _queue.pop();
+ DocsEnum postings = _queue.pop();
if (postings.advance(target) != NO_MORE_DOCS) {
_queue.add(postings);
}
@@ -563,7 +653,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
@Override
- public final int freq() {
+ public final int freq() throws IOException {
return _freq;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
index c975b01..7faa453 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
@@ -17,8 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Term;
+
import java.io.IOException;
-import org.apache.lucene.index.*;
/**
* Position of a term in a document that takes into account the term offset within the phrase.
@@ -29,13 +31,13 @@ final class PhrasePositions {
int count; // remaining pos in this doc
int offset; // position in phrase
final int ord; // unique across all PhrasePositions instances
- final DocsAndPositionsEnum postings; // stream of docs & positions
+ final DocsEnum postings; // stream of docs & positions
PhrasePositions next; // used to make lists
int rptGroup = -1; // >=0 indicates that this is a repeating PP
int rptInd; // index in the rptGroup
final Term[] terms; // for repetitions initialization
- PhrasePositions(DocsAndPositionsEnum postings, int o, int ord, Term[] terms) {
+ PhrasePositions(DocsEnum postings, int o, int ord, Term[] terms) {
this.postings = postings;
offset = o;
this.ord = ord;
@@ -44,6 +46,7 @@ final class PhrasePositions {
final boolean next() throws IOException { // increments to next doc
doc = postings.nextDoc();
+
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
@@ -80,10 +83,14 @@ final class PhrasePositions {
/** for debug purposes */
@Override
public String toString() {
- String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
+ String s = "d:"+doc+" offset:"+offset+" position:"+position+" c:"+count;
if (rptGroup >=0 ) {
s += " rpt:"+rptGroup+",i"+rptInd;
}
+ s += " t: [" + terms[0];
+ for (int i = 1; i < terms.length; i++)
+ s += "," + terms[1];
+ s += "]";
return s;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
index cdca801..46a51a0 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -1,6 +1,6 @@
package org.apache.lucene.search;
-/*
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,28 +17,28 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Set;
-
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Set;
+
/** A Query that matches documents containing a particular sequence of terms.
* A PhraseQuery is built by QueryParser for input like "new york".
*
@@ -138,13 +138,15 @@ public class PhraseQuery extends Query {
}
static class PostingsAndFreq implements Comparable {
- final DocsAndPositionsEnum postings;
+ final TermDocsEnumFactory factory;
+ final DocsEnum postings;
final int docFreq;
final int position;
final Term[] terms;
final int nTerms; // for faster comparisons
- public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term... terms) {
+ public PostingsAndFreq(DocsEnum postings, TermDocsEnumFactory factory, int docFreq, int position, Term... terms) throws IOException {
+ this.factory = factory;
this.postings = postings;
this.docFreq = docFreq;
this.position = position;
@@ -245,7 +247,7 @@ public class PhraseQuery extends Query {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException {
assert !terms.isEmpty();
final AtomicReader reader = context.reader();
final Bits liveDocs = acceptDocs;
@@ -267,7 +269,7 @@ public class PhraseQuery extends Query {
return null;
}
te.seekExact(t.bytes(), state);
- DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
+ DocsEnum postingsEnum = te.docs(liveDocs, null, DocsEnum.FLAG_POSITIONS);
// PhraseQuery on a field that did not index
// positions.
@@ -276,7 +278,8 @@ public class PhraseQuery extends Query {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
}
- postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
+ TermDocsEnumFactory factory = new TermDocsEnumFactory(t.bytes(), state, te, flags, acceptDocs);
+ postingsFreqs[i] = new PostingsAndFreq(postingsEnum, factory, te.docFreq(), positions.get(i).intValue(), t);
}
// sort by increasing docFreq order
@@ -298,7 +301,7 @@ public class PhraseQuery extends Query {
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- Scorer scorer = scorer(context, context.reader().getLiveDocs());
+ Scorer scorer = scorer(context, DocsEnum.FLAG_POSITIONS, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
@@ -396,4 +399,33 @@ public class PhraseQuery extends Query {
^ positions.hashCode();
}
+ static class TermDocsEnumFactory {
+ protected final TermsEnum termsEnum;
+ protected final Bits liveDocs;
+ protected final int flags;
+
+ private final BytesRef term;
+ private final TermState termState;
+
+ TermDocsEnumFactory(TermsEnum termsEnum, int flags, Bits liveDocs) {
+ this(null, null, termsEnum, flags, liveDocs);
+ }
+
+ TermDocsEnumFactory(BytesRef term, TermState termState, TermsEnum termsEnum, int flags, Bits liveDocs) {
+ this.termsEnum = termsEnum;
+ this.termState = termState;
+ this.liveDocs = liveDocs;
+ this.term = term;
+ this.flags = flags;
+ }
+
+
+ public DocsEnum docsAndPositionsEnum()
+ throws IOException {
+ assert term != null;
+ termsEnum.seekExact(term, termState);
+ return termsEnum.docsAndPositions(liveDocs, null, flags);
+ }
+
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java
index d2c8655..24a4815 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java
@@ -42,4 +42,8 @@ final class PhraseQueue extends PriorityQueue {
return pp1.doc < pp2.doc;
}
}
+
+ public Object[] getPPs() {
+ return getHeapArray();
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/PositionQueue.java b/lucene/core/src/java/org/apache/lucene/search/PositionQueue.java
new file mode 100644
index 0000000..3b57147
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/PositionQueue.java
@@ -0,0 +1,127 @@
+package org.apache.lucene.search;
+
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.search.posfilter.Interval;
+import org.apache.lucene.util.PriorityQueue;
+
+import java.io.IOException;
+
+/**
+ * Copyright (c) 2013 Lemur Consulting Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class PositionQueue extends PriorityQueue {
+
+ public class DocsEnumRef {
+
+ public final DocsEnum docsEnum;
+ public final int ord;
+ public Interval interval = new Interval();
+
+ public DocsEnumRef(DocsEnum docsEnum, int ord) {
+ this.docsEnum = docsEnum;
+ this.ord = ord;
+ }
+
+ public int nextPosition() throws IOException {
+ assert docsEnum.docID() != -1;
+ if (docsEnum.docID() == DocsEnum.NO_MORE_DOCS || docsEnum.docID() != docId
+ || docsEnum.nextPosition() == DocsEnum.NO_MORE_POSITIONS)
+ interval.setMaximum();
+ else
+ interval.update(this.docsEnum);
+ return interval.begin;
+ }
+
+ }
+
+ boolean positioned = false;
+ Interval current = new Interval();
+ int docId = -1;
+ protected int queuesize;
+
+ public PositionQueue(DocsEnum... subDocsEnums) {
+ super(subDocsEnums.length);
+ for (int i = 0; i < subDocsEnums.length; i++) {
+ add(new DocsEnumRef(subDocsEnums[i], i));
+ }
+ queuesize = subDocsEnums.length;
+ }
+
+ protected void init() throws IOException {
+ queuesize = 0;
+ for (Object scorerRef : getHeapArray()) {
+ if (scorerRef != null) {
+ ((DocsEnumRef) scorerRef).nextPosition();
+ queuesize++;
+ }
+ }
+ updateTop();
+ }
+
+ public int nextPosition() throws IOException {
+ if (!positioned) {
+ init();
+ positioned = true;
+ current.update(top().interval);
+ return current.begin;
+ }
+ if (current.begin == DocsEnum.NO_MORE_POSITIONS)
+ return DocsEnum.NO_MORE_POSITIONS;
+ if (top().nextPosition() == DocsEnum.NO_MORE_POSITIONS)
+ queuesize--;
+ updateInternalIntervals();
+ updateTop();
+ current.update(top().interval);
+ //System.out.println("PQ: " + current.toString());
+ return current.begin;
+ }
+
+ @Override
+ protected boolean lessThan(DocsEnumRef a, DocsEnumRef b) {
+ if (a.docsEnum.docID() < b.docsEnum.docID())
+ return true;
+ if (a.docsEnum.docID() > b.docsEnum.docID())
+ return false;
+ return a.interval.begin < b.interval.begin;
+ }
+
+ protected void updateInternalIntervals() {}
+
+ /**
+ * Must be called after the scorers have been advanced
+ */
+ public void advanceTo(int doc) {
+ positioned = false;
+ this.docId = doc;
+ this.queuesize = this.size();
+ }
+
+ public int startPosition() throws IOException {
+ return current.begin;
+ }
+
+ public int endPosition() throws IOException {
+ return current.end;
+ }
+
+ public int startOffset() throws IOException {
+ return current.offsetBegin;
+ }
+
+ public int endOffset() throws IOException {
+ return current.offsetEnd;
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/PositionsCollector.java b/lucene/core/src/java/org/apache/lucene/search/PositionsCollector.java
new file mode 100644
index 0000000..a573db5
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/PositionsCollector.java
@@ -0,0 +1,62 @@
+package org.apache.lucene.search;
+
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.search.posfilter.Interval;
+
+import java.io.IOException;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public abstract class PositionsCollector extends SimpleCollector {
+
+ private Scorer scorer;
+ private final boolean collectOffsets;
+
+ protected PositionsCollector(boolean collectOffsets) {
+ this.collectOffsets = collectOffsets;
+ }
+
+ protected PositionsCollector() {
+ this(false);
+ }
+
+ @Override
+ public final void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ }
+
+ @Override
+ public final void collect(int doc) throws IOException {
+ while (scorer.nextPosition() != DocsEnum.NO_MORE_POSITIONS) {
+ collectPosition(doc, new Interval(scorer));
+ }
+ }
+
+ protected abstract void collectPosition(int doc, Interval interval);
+
+ @Override
+ public int postingFeatures() {
+ return collectOffsets ? DocsEnum.FLAG_OFFSETS : DocsEnum.FLAG_POSITIONS;
+ }
+
+ @Override
+ public final boolean acceptsDocsOutOfOrder() {
+ return false;
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java
index 755c3cd..4aed657 100644
--- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java
@@ -17,13 +17,14 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocsEnum;
+
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
-import org.apache.lucene.index.AtomicReaderContext;
-
/** A {@link Rescorer} that uses a provided Query to assign
* scores to the first-pass hits.
*
@@ -82,7 +83,7 @@ public abstract class QueryRescorer extends Rescorer {
if (readerContext != null) {
// We advanced to another segment:
docBase = readerContext.docBase;
- scorer = weight.scorer(readerContext, null);
+ scorer = weight.scorer(readerContext, DocsEnum.FLAG_NONE, null);
}
if(scorer != null) {
diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java b/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java
index 50bc03e..4a4a0f8 100644
--- a/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java
+++ b/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java
@@ -17,11 +17,12 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.util.Bits;
+import java.io.IOException;
+
/**
* Constrains search results to only match those which also match a provided
* query.
@@ -56,7 +57,7 @@ public class QueryWrapperFilter extends Filter {
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
- return weight.scorer(privateContext, acceptDocs);
+ return weight.scorer(privateContext, DocsEnum.FLAG_FREQS, acceptDocs);
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
index 4e2a5f1..b487238 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
@@ -110,6 +110,31 @@ class ReqExclScorer extends Scorer {
}
@Override
+ public int nextPosition() throws IOException {
+ return reqScorer.nextPosition();
+ }
+
+ @Override
+ public int startPosition() throws IOException {
+ return reqScorer.startPosition();
+ }
+
+ @Override
+ public int endPosition() throws IOException {
+ return reqScorer.endPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return reqScorer.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return reqScorer.endOffset();
+ }
+
+ @Override
public Collection getChildren() {
return Collections.singleton(new ChildScorer(reqScorer, "MUST"));
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
index d7b4d86..aa65ab3 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
@@ -29,6 +29,7 @@ class ReqOptSumScorer extends Scorer {
/** The scorers passed from the constructor.
* These are set to null as soon as their next() or skipTo() returns false.
*/
+ private PositionQueue posQueue;
protected Scorer reqScorer;
protected Scorer optScorer;
@@ -45,16 +46,21 @@ class ReqOptSumScorer extends Scorer {
assert optScorer != null;
this.reqScorer = reqScorer;
this.optScorer = optScorer;
+ posQueue = new PositionQueue(reqScorer, optScorer);
}
@Override
public int nextDoc() throws IOException {
- return reqScorer.nextDoc();
+ int doc = reqScorer.nextDoc();
+ posQueue.advanceTo(doc);
+ return doc;
}
@Override
public int advance(int target) throws IOException {
- return reqScorer.advance(target);
+ int doc = reqScorer.advance(target);
+ posQueue.advanceTo(doc);
+ return doc;
}
@Override
@@ -93,6 +99,34 @@ class ReqOptSumScorer extends Scorer {
}
@Override
+ public int nextPosition() throws IOException {
+ int optDoc = optScorer.docID();
+ if (optDoc < reqScorer.docID())
+ optScorer.advance(reqScorer.docID());
+ return posQueue.nextPosition();
+ }
+
+ @Override
+ public int startPosition() throws IOException {
+ return posQueue.startPosition();
+ }
+
+ @Override
+ public int endPosition() throws IOException {
+ return posQueue.endPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return posQueue.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return posQueue.endOffset();
+ }
+
+ @Override
public Collection getChildren() {
ArrayList children = new ArrayList<>(2);
children.add(new ChildScorer(reqScorer, "MUST"));
diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
index 929d3b9..cca0808 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
@@ -17,12 +17,12 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocsEnum;
+
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
-import org.apache.lucene.index.DocsEnum;
-
/**
* Expert: Common scoring functionality for different types of queries.
*
@@ -67,6 +67,15 @@ public abstract class Scorer extends DocsEnum {
public Weight getWeight() {
return weight;
}
+
+ @Override
+ public String toString() {
+ try {
+ return String.format("%d:%d(%d)->%d(%d)", docID(), startPosition(), startOffset(), endPosition(), endOffset());
+ } catch (IOException e) {
+ return String.format("Cannot retrieve position due to IOException");
+ }
+ }
/** Returns child sub-scorers
* @lucene.experimental */
diff --git a/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java b/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java
index 5803b2e..6cfc5be 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java
@@ -17,9 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocsEnum;
+
+import java.io.IOException;
/**
* Base {@link Collector} implementation that is used to collect all contexts.
@@ -50,4 +51,9 @@ public abstract class SimpleCollector implements Collector, LeafCollector {
@Override
public abstract void collect(int doc) throws IOException;
+ @Override
+ public int postingFeatures() {
+ return DocsEnum.FLAG_FREQS;
+ }
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
index 986ab06..cf6ae33 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
@@ -17,6 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.FixedBitSet;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -25,9 +29,6 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.util.FixedBitSet;
final class SloppyPhraseScorer extends Scorer {
private PhrasePositions min, max;
@@ -35,6 +36,7 @@ final class SloppyPhraseScorer extends Scorer {
private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
private final Similarity.SimScorer docScorer;
+ private final PhraseQuery.PostingsAndFreq[] postings;
private final int slop;
private final int numPostings;
@@ -55,6 +57,7 @@ final class SloppyPhraseScorer extends Scorer {
int slop, Similarity.SimScorer docScorer) {
super(weight);
this.docScorer = docScorer;
+ this.postings = postings;
this.slop = slop;
this.numPostings = postings==null ? 0 : postings.length;
pq = new PhraseQueue(postings.length);
@@ -79,58 +82,78 @@ final class SloppyPhraseScorer extends Scorer {
}
}
- /**
- * Score a candidate doc for all slop-valid position-combinations (matches)
- * encountered while traversing/hopping the PhrasePositions.
- * The score contribution of a match depends on the distance:
- * - highest score for distance=0 (exact match).
- * - score gets lower as distance gets higher.
- * Example: for query "a b"~2, a document "x a b a y" can be scored twice:
- * once for "a b" (distance=0), and once for "b a" (distance=2).
- * Possibly not all valid combinations are encountered, because for efficiency
- * we always propagate the least PhrasePosition. This allows to base on
- * PriorityQueue and move forward faster.
- * As result, for example, document "a b c b a"
- * would score differently for queries "a b c"~4 and "c b a"~4, although
- * they really are equivalent.
- * Similarly, for doc "a b c b a f g", query "c b"~2
- * would get same score as "g f"~2, although "c b"~2 could be matched twice.
- * We may want to fix this in the future (currently not, for performance reasons).
- */
- private float phraseFreq() throws IOException {
- if (!initPhrasePositions()) {
- return 0.0f;
+ private int matchLength;
+ private int startpos = -1;
+ private int endpos = -1;
+
+ @Override
+ public int nextPosition() throws IOException {
+ if (cached) {
+ cached = false;
+ return this.startPosition();
}
- float freq = 0.0f;
- numMatches = 0;
- PhrasePositions pp = pq.pop();
- int matchLength = end - pp.position;
- int next = pq.top().position;
- while (advancePP(pp)) {
- if (hasRpts && !advanceRpts(pp)) {
+
+ if (pq.size() < postings.length)
+ return NO_MORE_POSITIONS;
+
+ PhrasePositions top = pq.pop();
+ matchLength = end - top.position;
+ int next = pq.top().position;
+ int pos = top.position + top.offset;
+ while (advancePP(top)) {
+ if (hasRpts && !advanceRpts(top))
break; // pps exhausted
- }
- if (pp.position > next) { // done minimizing current match-length
+ if (top.position > next) { // done minimizing current match-length
if (matchLength <= slop) {
- freq += docScorer.computeSlopFactor(matchLength); // score match
- numMatches++;
- }
- pq.add(pp);
- pp = pq.pop();
+ setSpan(pos);
+ pq.add(top);
+ return startpos;
+ }
+ pq.add(top);
+ top = pq.pop();
next = pq.top().position;
- matchLength = end - pp.position;
- } else {
- int matchLength2 = end - pp.position;
- if (matchLength2 < matchLength) {
+ matchLength = end - top.position;
+ pos = top.position + top.offset;
+ }
+ else {
+ int matchLength2 = end - top.position;
+ pos = top.position + top.offset;
+ if (matchLength2 < matchLength)
matchLength = matchLength2;
- }
}
}
+
if (matchLength <= slop) {
- freq += docScorer.computeSlopFactor(matchLength); // score match
- numMatches++;
- }
- return freq;
+ setSpan(pos);
+ return startpos;
+ }
+
+ return NO_MORE_POSITIONS;
+
+ }
+
+ private void setSpan(int topPos) {
+ startpos = topPos;
+ endpos = topPos;
+ for (Object o : pq.getPPs()) {
+ if (o == null)
+ continue;
+ PhrasePositions pp = (PhrasePositions) o;
+ startpos = Math.min(startpos, pp.position + pp.offset);
+ endpos = Math.max(endpos, pp.position + pp.offset);
+ }
+ }
+
+ boolean cached = false;
+
+ private int firstPosition() throws IOException {
+ if (!initPhrasePositions())
+ return NO_MORE_POSITIONS;
+
+ cached = false;
+ int pos = nextPosition();
+ cached = true;
+ return pos;
}
/** advance a PhrasePosition and update 'end', return false if exhausted */
@@ -520,15 +543,61 @@ final class SloppyPhraseScorer extends Scorer {
}
@Override
- public int freq() {
- return numMatches;
+ public int freq() throws IOException {
+ int f = 0;
+ while (nextPosition() != NO_MORE_POSITIONS) {
+ f++;
+ }
+ return f;
}
-
- float sloppyFreq() {
- return sloppyFreq;
+
+ /**
+ * Score a candidate doc for all slop-valid position-combinations (matches)
+ * encountered while traversing/hopping the PhrasePositions.
+ * The score contribution of a match depends on the distance:
+ * - highest score for distance=0 (exact match).
+ * - score gets lower as distance gets higher.
+ * Example: for query "a b"~2, a document "x a b a y" can be scored twice:
+ * once for "a b" (distance=0), and once for "b a" (distance=2).
+ * Possibly not all valid combinations are encountered, because for efficiency
+ * we always propagate the least PhrasePosition. This allows to base on
+ * PriorityQueue and move forward faster.
+ * As result, for example, document "a b c b a"
+ * would score differently for queries "a b c"~4 and "c b a"~4, although
+ * they really are equivalent.
+ * Similarly, for doc "a b c b a f g", query "c b"~2
+ * would get same score as "g f"~2, although "c b"~2 could be matched twice.
+ * We may want to fix this in the future (currently not, for performance reasons).
+ */
+ float sloppyFreq() throws IOException {
+ float f = 0.0f;
+ while (nextPosition() != NO_MORE_POSITIONS) {
+ f += docScorer.computeSlopFactor(matchLength);
+ }
+ return f;
}
-
-// private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
+
+ @Override
+ public int startOffset() throws IOException {
+ return -1; // nocommit
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return -1; // nocommit
+ }
+
+ @Override
+ public int startPosition() throws IOException {
+ return startpos;
+ }
+
+ @Override
+ public int endPosition() throws IOException {
+ return endpos;
+ }
+
+ // private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
// //if (min.doc != ?) return;
// ps.println();
// ps.println("---- "+title);
@@ -570,8 +639,8 @@ final class SloppyPhraseScorer extends Scorer {
}
@Override
- public float score() {
- return docScorer.score(max.doc, sloppyFreq);
+ public float score() throws IOException {
+ return docScorer.score(max.doc, sloppyFreq());
}
@Override
@@ -587,9 +656,9 @@ final class SloppyPhraseScorer extends Scorer {
}
}
// found a doc with all of the terms
- sloppyFreq = phraseFreq(); // check for phrase
+ //sloppyFreq = phraseFreq(); // check for phrase
target = min.doc + 1; // next target in case sloppyFreq is still 0
- } while (sloppyFreq == 0f);
+ } while (firstPosition() == NO_MORE_POSITIONS);
// found a match
return max.doc;
@@ -602,4 +671,5 @@ final class SloppyPhraseScorer extends Scorer {
@Override
public String toString() { return "scorer(" + weight + ")"; }
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
index 5435ccd..5174ee3 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
@@ -17,71 +17,75 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Set;
-
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
-/** A Query that matches documents containing a term.
- This may be combined with other terms with a {@link BooleanQuery}.
- */
+import java.io.IOException;
+import java.util.Set;
+
+/**
+ * A Query that matches documents containing a term. This may be combined with
+ * other terms with a {@link BooleanQuery}.
+ */
public class TermQuery extends Query {
private final Term term;
private final int docFreq;
private final TermContext perReaderTermState;
-
+
final class TermWeight extends Weight {
private final Similarity similarity;
private final Similarity.SimWeight stats;
private final TermContext termStates;
-
+
public TermWeight(IndexSearcher searcher, TermContext termStates)
- throws IOException {
+ throws IOException {
assert termStates != null : "TermContext must not be null";
this.termStates = termStates;
this.similarity = searcher.getSimilarity();
- this.stats = similarity.computeWeight(
- getBoost(),
- searcher.collectionStatistics(term.field()),
+ this.stats = similarity.computeWeight(getBoost(),
+ searcher.collectionStatistics(term.field()),
searcher.termStatistics(term, termStates));
}
-
+
@Override
- public String toString() { return "weight(" + TermQuery.this + ")"; }
-
+ public String toString() {
+ return "weight(" + TermQuery.this + ")";
+ }
+
@Override
- public Query getQuery() { return TermQuery.this; }
-
+ public Query getQuery() {
+ return TermQuery.this;
+ }
+
@Override
public float getValueForNormalization() {
return stats.getValueForNormalization();
}
-
+
@Override
public void normalize(float queryNorm, float topLevelBoost) {
stats.normalize(queryNorm, topLevelBoost);
}
-
+
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException {
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
final TermsEnum termsEnum = getTermsEnum(context);
if (termsEnum == null) {
return null;
}
- DocsEnum docs = termsEnum.docs(acceptDocs, null);
+ DocsEnum docs = termsEnum.docs(acceptDocs, null, flags);
assert docs != null;
return new TermScorer(this, docs, similarity.simScorer(stats, context));
}
@@ -96,90 +100,100 @@ public class TermQuery extends Query {
assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
return null;
}
- //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
- final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
+ // System.out.println("LD=" + reader.getLiveDocs() + " set?=" +
+ // (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
+ final TermsEnum termsEnum = context.reader().terms(term.field())
+ .iterator(null);
termsEnum.seekExact(term.bytes(), state);
return termsEnum;
}
private boolean termNotInReader(AtomicReader reader, Term term) throws IOException {
// only called from assert
- //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
+ // System.out.println("TQ.termNotInReader reader=" + reader + " term=" +
+ // field + ":" + bytes.utf8ToString());
return reader.docFreq(term) == 0;
}
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- Scorer scorer = scorer(context, context.reader().getLiveDocs());
+ Scorer scorer = scorer(context, DocsEnum.FLAG_FREQS, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
- result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
- Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));
+ result.setDescription("weight(" + getQuery() + " in " + doc + ") ["
+ + similarity.getClass().getSimpleName() + "], result of:");
+ Explanation scoreExplanation = docScorer.explain(doc,
+ new Explanation(freq, "termFreq=" + freq));
result.addDetail(scoreExplanation);
result.setValue(scoreExplanation.getValue());
result.setMatch(true);
return result;
}
}
- return new ComplexExplanation(false, 0.0f, "no matching term");
+ return new ComplexExplanation(false, 0.0f, "no matching term");
}
}
-
+
/** Constructs a query for the term t. */
public TermQuery(Term t) {
this(t, -1);
}
-
- /** Expert: constructs a TermQuery that will use the
- * provided docFreq instead of looking up the docFreq
- * against the searcher. */
+
+ /**
+ * Expert: constructs a TermQuery that will use the provided docFreq instead
+ * of looking up the docFreq against the searcher.
+ */
public TermQuery(Term t, int docFreq) {
term = t;
this.docFreq = docFreq;
perReaderTermState = null;
}
- /** Expert: constructs a TermQuery that will use the
- * provided docFreq instead of looking up the docFreq
- * against the searcher. */
+ /**
+ * Expert: constructs a TermQuery that will use the provided docFreq instead
+ * of looking up the docFreq against the searcher.
+ */
public TermQuery(Term t, TermContext states) {
assert states != null;
term = t;
docFreq = states.docFreq();
perReaderTermState = states;
}
-
+
/** Returns the term of this query. */
- public Term getTerm() { return term; }
-
+ public Term getTerm() {
+ return term;
+ }
+
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
final IndexReaderContext context = searcher.getTopReaderContext();
final TermContext termState;
- if (perReaderTermState == null || perReaderTermState.topReaderContext != context) {
- // make TermQuery single-pass if we don't have a PRTS or if the context differs!
+ if (perReaderTermState == null
+ || perReaderTermState.topReaderContext != context) {
+ // make TermQuery single-pass if we don't have a PRTS or if the context
+ // differs!
termState = TermContext.build(context, term);
} else {
- // PRTS was pre-build for this IS
- termState = this.perReaderTermState;
+ // PRTS was pre-build for this IS
+ termState = this.perReaderTermState;
}
-
+
// we must not ignore the given docFreq - if set use the given value (lie)
- if (docFreq != -1)
- termState.setDocFreq(docFreq);
+ if (docFreq != -1) termState.setDocFreq(docFreq);
return new TermWeight(searcher, termState);
}
-
+
@Override
public void extractTerms(Set terms) {
terms.add(getTerm());
}
-
+
/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {
@@ -192,21 +206,20 @@ public class TermQuery extends Query {
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
-
+
/** Returns true iff o is equal to this. */
@Override
public boolean equals(Object o) {
- if (!(o instanceof TermQuery))
- return false;
- TermQuery other = (TermQuery)o;
+ if (!(o instanceof TermQuery)) return false;
+ TermQuery other = (TermQuery) o;
return (this.getBoost() == other.getBoost())
- && this.term.equals(other.term);
+ && this.term.equals(other.term);
}
-
- /** Returns a hash code value for this object.*/
+
+ /** Returns a hash code value for this object. */
@Override
public int hashCode() {
return Float.floatToIntBits(getBoost()) ^ term.hashCode();
}
-
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
index 6697524..71f15ee 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
@@ -17,10 +17,11 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
/** Expert: A Scorer for documents matching a Term.
*/
@@ -64,6 +65,36 @@ final class TermScorer extends Scorer {
public int nextDoc() throws IOException {
return docsEnum.nextDoc();
}
+
+ @Override
+ public int nextPosition() throws IOException {
+ return docsEnum.nextPosition();
+ }
+
+ @Override
+ public int startPosition() throws IOException {
+ return docsEnum.startPosition();
+ }
+
+ @Override
+ public int endPosition() throws IOException {
+ return docsEnum.endPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return docsEnum.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return docsEnum.endOffset();
+ }
+
+ @Override
+ public BytesRef getPayload() throws IOException {
+ return docsEnum.getPayload();
+ }
@Override
public float score() throws IOException {
@@ -92,5 +123,16 @@ final class TermScorer extends Scorer {
/** Returns a string representation of this TermScorer. */
@Override
- public String toString() { return "scorer(" + weight + ")"; }
+ public String toString() {
+ return "scorer(" + weight + ")[" + super.toString() + "]";
+ }
+
+ // TODO: benchmark if the specialized conjunction really benefits
+ // from this, or if instead its from sorting by docFreq, or both
+
+ DocsEnum getDocsEnum() {
+ return docsEnum;
+ }
+
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java b/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java
index 9a08a2b..9baf702 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java
@@ -156,7 +156,12 @@ public class TimeLimitingCollector implements Collector {
};
}
-
+
+ @Override
+ public int postingFeatures() {
+ return collector.postingFeatures();
+ }
+
/**
* This is so the same timer can be used with a multi-phase search process such as grouping.
* We don't want to create a new TimeLimitingCollector for each phase because that would
diff --git a/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java b/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java
index 4fc5be6..188d06c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocsEnum;
/**
* Just counts the total number of hits.
@@ -36,6 +37,12 @@ public class TotalHitCountCollector extends SimpleCollector {
}
@Override
+ public int postingFeatures() {
+ // we don't need frequencies here
+ return DocsEnum.FLAG_NONE;
+ }
+
+ @Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java
index 8398157..8122279 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@@ -17,14 +17,14 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
-import org.apache.lucene.index.AtomicReader; // javadocs
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.IndexReaderContext; // javadocs
+import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
+import java.io.IOException;
+
/**
* Expert: Calculate query weights and build query scorers.
*
@@ -35,7 +35,7 @@ import org.apache.lucene.util.Bits;
* {@link AtomicReader} dependent state should reside in the {@link Scorer}.
*
* Since {@link Weight} creates {@link Scorer} instances for a given
- * {@link AtomicReaderContext} ({@link #scorer(AtomicReaderContext, Bits)})
+ * {@link AtomicReaderContext} ({@link #scorer(AtomicReaderContext, int, Bits)})
* callers must maintain the relationship between the searcher's top-level
* {@link IndexReaderContext} and the context used to create a {@link Scorer}.
*
The query normalization factor is passed to {@link #normalize(float, float)}. At
* this point the weighting is complete.
*
A Scorer is constructed by
- * {@link #scorer(AtomicReaderContext, Bits)}.
+ * {@link #scorer(AtomicReaderContext, int, Bits)}.
*
*
* @since 2.9
@@ -97,7 +97,7 @@ public abstract class Weight {
* @return a {@link Scorer} which scores documents in/out-of order.
* @throws IOException if there is a low-level I/O error
*/
- public abstract Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException;
+ public abstract Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException;
/**
* Optional method, to return a {@link BulkScorer} to
@@ -126,9 +126,9 @@ public abstract class Weight {
* passes them to a collector.
* @throws IOException if there is a low-level I/O error
*/
- public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException {
+ public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, int flags, Bits acceptDocs) throws IOException {
- Scorer scorer = scorer(context, acceptDocs);
+ Scorer scorer = scorer(context, flags, acceptDocs);
if (scorer == null) {
// No docs match
return null;
@@ -199,14 +199,13 @@ public abstract class Weight {
* Returns true iff this implementation scores docs only out of order. This
* method is used in conjunction with {@link Collector}'s
* {@link LeafCollector#acceptsDocsOutOfOrder() acceptsDocsOutOfOrder} and
- * {@link #bulkScorer(AtomicReaderContext, boolean, Bits)} to
+ * {@link #bulkScorer(AtomicReaderContext, boolean, int, Bits)} to
* create a matching {@link Scorer} instance for a given {@link Collector}, or
* vice versa.
*
* NOTE: the default implementation returns false, i.e.
* the Scorer scores documents in-order.
*/
- public boolean scoresDocsOutOfOrder() {
- return false;
- }
+ public boolean scoresDocsOutOfOrder() { return false; }
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/package.html b/lucene/core/src/java/org/apache/lucene/search/package.html
index 1be51fb..1bba734 100644
--- a/lucene/core/src/java/org/apache/lucene/search/package.html
+++ b/lucene/core/src/java/org/apache/lucene/search/package.html
@@ -436,8 +436,8 @@ on the built-in available scoring models and extending or changing Similarity.
that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will just defer to the Similarity's implementation:
{@link org.apache.lucene.search.similarities.Similarity.SimWeight#normalize SimWeight#normalize(float,float)}.
- {@link org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.AtomicReaderContext, org.apache.lucene.util.Bits)
- scorer(AtomicReaderContext context, Bits acceptDocs)} —
+ {@link org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.AtomicReaderContext, PostingFeatures flags, org.apache.lucene.util.Bits)
+ scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs)} —
Construct a new {@link org.apache.lucene.search.Scorer Scorer} for this Weight. See The Scorer Class
below for help defining a Scorer. As the name implies, the Scorer is responsible for doing the actual scoring of documents
given the Query.
diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
index d2e924e..bf8eab4 100644
--- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search.payloads;
*/
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Scorer;
@@ -148,14 +149,14 @@ public class PayloadNearQuery extends SpanNearQuery {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException {
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
similarity, similarity.simScorer(stats, context));
}
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- PayloadNearSpanScorer scorer = (PayloadNearSpanScorer) scorer(context, context.reader().getLiveDocs());
+ PayloadNearSpanScorer scorer = (PayloadNearSpanScorer) scorer(context, DocsEnum.FLAG_PAYLOADS, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
index 04ecd80..67aeee2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
@@ -19,7 +19,7 @@ package org.apache.lucene.search.payloads;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
@@ -79,7 +79,7 @@ public class PayloadTermQuery extends SpanTermQuery {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, int flags, Bits acceptDocs) throws IOException {
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
this, similarity.simScorer(stats, context));
}
@@ -120,7 +120,7 @@ public class PayloadTermQuery extends SpanTermQuery {
protected void processPayload(Similarity similarity) throws IOException {
if (termSpans.isPayloadAvailable()) {
- final DocsAndPositionsEnum postings = termSpans.getPostings();
+ final DocsEnum postings = termSpans.getPostings();
payload = postings.getPayload();
if (payload != null) {
payloadScore = function.currentScore(doc, term.field(),
@@ -176,7 +176,7 @@ public class PayloadTermQuery extends SpanTermQuery {
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, context.reader().getLiveDocs());
+ PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, DocsEnum.FLAG_POSITIONS, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/BlockPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/BlockPhraseScorer.java
new file mode 100644
index 0000000..a9d6e19
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/BlockPhraseScorer.java
@@ -0,0 +1,70 @@
+package org.apache.lucene.search.posfilter;
+
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.similarities.Similarity;
+
+import java.io.IOException;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class BlockPhraseScorer extends PositionFilteredScorer {
+
+ private final Interval[] subIntervals;
+
+ public BlockPhraseScorer(Scorer filteredScorer, Similarity.SimScorer simScorer) {
+ super(filteredScorer, simScorer);
+ subIntervals = new Interval[subScorers.length];
+ for (int i = 0; i < subScorers.length; i++) {
+ subIntervals[i] = new Interval();
+ }
+ }
+
+ @Override
+ public void reset(int doc) throws IOException {
+ super.reset(doc);
+ for (int i = 0; i < subScorers.length; i++) {
+ subIntervals[i].reset();
+ }
+ }
+
+ @Override
+ protected int doNextPosition() throws IOException {
+ if (subScorers[0].nextPosition() == NO_MORE_POSITIONS)
+ return NO_MORE_POSITIONS;
+ subIntervals[0].update(subScorers[0]);
+ int i = 1;
+ while (i < subScorers.length) {
+ while (subIntervals[i].begin <= subIntervals[i - 1].end) {
+ if (subScorers[i].nextPosition() == NO_MORE_POSITIONS)
+ return NO_MORE_POSITIONS;
+ subIntervals[i].update(subScorers[i]);
+ }
+ if (subIntervals[i].begin == subIntervals[i - 1].end + 1) {
+ i++;
+ }
+ else {
+ if (subScorers[0].nextPosition() == NO_MORE_POSITIONS)
+ return NO_MORE_POSITIONS;
+ subIntervals[0].update(subScorers[0]);
+ i = 1;
+ }
+ }
+ current.update(subIntervals[0], subIntervals[subScorers.length - 1]);
+ return subScorers[0].startPosition();
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/Interval.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/Interval.java
new file mode 100644
index 0000000..b29f85c
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/Interval.java
@@ -0,0 +1,200 @@
+package org.apache.lucene.search.posfilter;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.DocsEnum;
+
+import java.io.IOException;
+
+/**
+ * Represents a section of a document that matches a query
+ */
+public class Interval implements Cloneable {
+
+ /** The position of the start of this Interval */
+ public int begin;
+
+ /** The position of the end of this Interval */
+ public int end;
+
+ /** The offset of the start of this Interval */
+ public int offsetBegin;
+
+ /** The offset of the end of this Interval */
+ public int offsetEnd;
+
+ /** An interval that will always compare as less than any other interval */
+ public static final Interval INFINITE_INTERVAL = new Interval();
+
+ /**
+ * Constructs a new Interval
+ * @param begin the start position
+ * @param end the end position
+ * @param offsetBegin the start offset
+ * @param offsetEnd the end offset
+ */
+ public Interval(int begin, int end, int offsetBegin, int offsetEnd) {
+ this.begin = begin;
+ this.end = end;
+ this.offsetBegin = offsetBegin;
+ this.offsetEnd = offsetEnd;
+ }
+
+ /**
+ * Constructs a new Interval with no initial values. This
+ * will always compare as less than any other Interval.
+ */
+ public Interval() {
+ this(Integer.MIN_VALUE, Integer.MIN_VALUE, -1, -1);
+ }
+
+ public Interval(DocsEnum docsEnum) throws IOException {
+ this.begin = docsEnum.startPosition();
+ this.end = docsEnum.endPosition();
+ this.offsetBegin = docsEnum.startOffset();
+ this.offsetEnd = docsEnum.endOffset();
+ }
+
+ /**
+ * Update to span the range defined by two other Intervals.
+ * @param start the first Interval
+ * @param end the second Interval
+ */
+ public void update(Interval start, Interval end) {
+ this.begin = start.begin;
+ this.offsetBegin = start.offsetBegin;
+ this.end = end.end;
+ this.offsetEnd = end.offsetEnd;
+ }
+
+ /**
+ * Compare with another Interval.
+ * @param other the comparator
+ * @return true if both start and end positions are less than
+ * the comparator.
+ */
+ public boolean lessThanExclusive(Interval other) {
+ return begin < other.begin && end < other.end;
+ }
+
+ /**
+ * Compare with another Interval.
+ * @param other the comparator
+ * @return true if both start and end positions are less than
+ * or equal to the comparator's.
+ */
+ public boolean lessThan(Interval other) {
+ return begin <= other.begin && end <= other.end;
+ }
+
+ /**
+ * Compare with another Interval
+ * @param other the comparator
+ * @return true if both start and end positions are greater then
+ * the comparator's.
+ */
+ public boolean greaterThanExclusive(Interval other) {
+ return begin > other.begin && end > other.end;
+ }
+
+ /**
+ * Compare with another Interval
+ * @param other the comparator
+ * @return true if both start and end positions are greater then
+ * of equal to the comparator's.
+ */
+ public boolean greaterThan(Interval other) {
+ return begin >= other.begin && end >= other.end;
+ }
+
+ /**
+ * Compare with another Interval
+ * @param other the comparator
+ * @return true if this Interval contains the comparator
+ */
+ public boolean contains(Interval other) {
+ return begin <= other.begin && other.end <= end;
+ }
+
+ /**
+ * Compare with another Interval to find overlaps
+ * @param other
+ * @return true if the two intervals overlap
+ */
+ public boolean overlaps(Interval other) {
+ return this.contains(other) || other.contains(this);
+ }
+
+ /**
+ * Set all values of this Interval to be equal to another's
+ * @param other the Interval to copy
+ */
+ public void copy(Interval other) {
+ begin = other.begin;
+ end = other.end;
+ offsetBegin = other.offsetBegin;
+ offsetEnd = other.offsetEnd;
+ }
+
+ /**
+ * Set to a state that will always compare as less than any
+ * other Interval.
+ */
+ public void reset() {
+ offsetBegin = offsetEnd = -1;
+ begin = end = Integer.MIN_VALUE;
+ }
+
+ /**
+ * Set to a state that will always compare as more than any
+ * other Interval.
+ */
+ public void setMaximum() {
+ offsetBegin = offsetEnd = -1;
+ begin = end = Integer.MAX_VALUE;
+ }
+
+ @Override
+ public Object clone() {
+ try {
+ return super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException(); // should not happen
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "Interval [begin=" + begin + "(" + offsetBegin + "), end="
+ + end + "(" + offsetEnd + ")]";
+ }
+
+ public void update(DocsEnum docsEnum) throws IOException {
+ offsetBegin = docsEnum.startOffset();
+ offsetEnd = docsEnum.endOffset();
+ begin = docsEnum.startPosition();
+ end = docsEnum.endPosition();
+ }
+
+ public void update(Interval interval) {
+ this.begin = interval.begin;
+ this.end = interval.end;
+ this.offsetBegin = interval.offsetBegin;
+ this.offsetEnd = interval.offsetEnd;
+ }
+
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/posfilter/NonOverlappingQuery.java b/lucene/core/src/java/org/apache/lucene/search/posfilter/NonOverlappingQuery.java
new file mode 100644
index 0000000..a6f340f
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/posfilter/NonOverlappingQuery.java
@@ -0,0 +1,204 @@
+package org.apache.lucene.search.posfilter;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.Bits;
+
+import java.io.IOException;
+import java.util.Set;
+
+/**
+ * A Query that matches documents containing an interval (the minuend) that
+ * does not contain another interval (the subtrahend).
+ *
+ * As an example, given the following {@link org.apache.lucene.search.BooleanQuery}:
+ *