diff --git a/.gitignore b/.gitignore
index 443dd2f..811524e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,7 +19,6 @@
/prj.el
/bin
/bin.*
-**/pom.xml
/nbproject
/nb-build
@@ -43,4 +42,219 @@
solr/contrib/dataimporthandler/test-lib/
solr/contrib/morphlines-core/test-lib/
+# ./solr/example/example-DIH/solr/db
+/solr/example/example-DIH/solr/db/data
+
+
+# ./solr/example/example-DIH/solr/db/conf
+/solr/example/example-DIH/solr/db/conf/dataimport.properties
+
+
+# ./solr/example/example-DIH/solr/tika
+/solr/example/example-DIH/solr/tika/data
+
+
+# ./solr/site-src
+/solr/site-src/build
+
+
+# ./solr/webapp
+/solr/webapp/pom.xml
+
+
+# ./solr/test-framework
+/solr/test-framework/pom.xml
+
+
+# ./solr/contrib
+/solr/contrib/pom.xml
+
+
+# ./solr/contrib/clustering
+/solr/contrib/clustering/build
+/solr/contrib/clustering/*.iml
+/solr/contrib/clustering/pom.xml
+
+
+# ./solr/contrib/clustering/lib
+/solr/contrib/clustering/lib/pcj-*
+/solr/contrib/clustering/lib/simple-xml-*
+/solr/contrib/clustering/lib/colt-*
+/solr/contrib/clustering/lib/nni-*
+/solr/contrib/clustering/lib/downloads
+
+
+# ./solr/contrib/uima
+/solr/contrib/uima/build
+/solr/contrib/uima/*.iml
+/solr/contrib/uima/pom.xml
+
+
+# ./solr/contrib/velocity
+/solr/contrib/velocity/*.iml
+/solr/contrib/velocity/pom.xml
+
+
+# ./solr/contrib/extraction
+/solr/contrib/extraction/build
+/solr/contrib/extraction/*.iml
+/solr/contrib/extraction/pom.xml
+
+
+# ./solr/contrib/analysis-extras
+/solr/contrib/analysis-extras/build
+/solr/contrib/analysis-extras/lucene-libs
+/solr/contrib/analysis-extras/*.iml
+/solr/contrib/analysis-extras/pom.xml
+
+
+# ./solr/contrib/langid
+/solr/contrib/langid/*.iml
+/solr/contrib/langid/pom.xml
+
+
+# ./solr/contrib/dataimporthandler-extras
+/solr/contrib/dataimporthandler-extras/*.iml
+/solr/contrib/dataimporthandler-extras/pom.xml
+
+
+# ./solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/conf
+/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/conf/dataimport.properties
+
+
+# ./solr/contrib/dataimporthandler
+/solr/contrib/dataimporthandler/target
+/solr/contrib/dataimporthandler/*.iml
+/solr/contrib/dataimporthandler/pom.xml
+
+
+# ./solr/contrib/dataimporthandler/src
+/solr/contrib/dataimporthandler/src/pom.xml
+
+
+# ./solr/contrib/dataimporthandler/src/test-files
+/solr/contrib/dataimporthandler/src/test-files/dataimport.properties
+
+
+# ./solr/contrib/dataimporthandler/src/test-files/dih/solr/conf
+/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport.properties
+
+
+# ./modules
+/modules/build
+/modules/dist
+/modules/pom.xml
+
+
+# ./modules/facet
+/modules/facet/build
+/modules/facet/dist
+/modules/facet/*.iml
+/modules/facet/pom.xml
+
+
+# ./modules/benchmark
+/modules/benchmark/temp
+/modules/benchmark/work
+/modules/benchmark/build
+/modules/benchmark/*.iml
+/modules/benchmark/pom.xml
+
+
+# ./modules/grouping
+/modules/grouping/build
+/modules/grouping/dist
+/modules/grouping/*.iml
+/modules/grouping/pom.xml
+
+
+# ./modules/spatial
+/modules/spatial/build
+/modules/spatial/pom.xml
+/modules/spatial/*.iml
+
+
+# ./modules/analysis
+/modules/analysis/build
+/modules/analysis/pom.xml
+
+
+# ./modules/analysis/icu
+/modules/analysis/icu/*.iml
+/modules/analysis/icu/pom.xml
+
+
+# ./modules/analysis/uima
+/modules/analysis/uima/*.iml
+/modules/analysis/uima/pom.xml
+
+
+# ./modules/analysis/morfologik
+/modules/analysis/morfologik/*.iml
+/modules/analysis/morfologik/pom.xml
+
+
+# ./modules/analysis/common
+/modules/analysis/common/*.iml
+/modules/analysis/common/pom.xml
+
+
+# ./modules/analysis/common/src/java/org/apache/lucene/analysis/standard
+/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/Token.java
+/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java
+/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/TokenMgrError.java
+/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/CharStream.java
+/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerConstants.java
+
+
+# ./modules/analysis/stempel
+/modules/analysis/stempel/*.iml
+/modules/analysis/stempel/pom.xml
+
+
+# ./modules/analysis/phonetic
+/modules/analysis/phonetic/*.iml
+/modules/analysis/phonetic/pom.xml
+
+
+# ./modules/analysis/smartcn
+/modules/analysis/smartcn/*.iml
+/modules/analysis/smartcn/pom.xml
+
+
+# ./modules/analysis/kuromoji
+/modules/analysis/kuromoji/*.iml
+/modules/analysis/kuromoji/pom.xml
+
+
+# ./modules/queries
+/modules/queries/build
+/modules/queries/pom.xml
+/modules/queries/*.iml
+
+
+# ./modules/join
+/modules/join/build
+/modules/join/dist
+/modules/join/*.iml
+/modules/join/pom.xml
+
+
+# ./modules/queryparser
+/modules/queryparser/pom.xml
+/modules/queryparser/*.iml
+/modules/queryparser/build
+
+
+# ./modules/suggest
+/modules/suggest/build
+/modules/suggest/*.iml
+/modules/suggest/pom.xml
+/modules/suggest/dist
+
+/modules/spatial/build/
+
+# maven-build
+target/
solr/core/test-lib/
diff --git a/README.txt b/README.txt
index 3599b5b..addf65c 100644
--- a/README.txt
+++ b/README.txt
@@ -1,13 +1,13 @@
-Apache Lucene/Solr
+Apache Lucene/Solr - positions fork
lucene/ is a search engine library
solr/ is a search engine server that uses lucene
-To compile the sources run 'ant compile'
-To run all the tests run 'ant test'
-To setup your ide run 'ant idea', 'ant netbeans', or 'ant eclipse'
-For Maven info, see dev-tools/maven/README.maven
+This version of lucene/solr is a fork based on
+https://issues.apache.org/jira/browse/LUCENE-2878, that allows consumer code to
+iterate through individual hit positions on a searcher match.
-For more information on how to contribute see:
-http://wiki.apache.org/lucene-java/HowToContribute
-http://wiki.apache.org/solr/HowToContribute
+To build:
+
+cd maven-build
+mvn -DskipTests install
diff --git a/dev-tools/idea/lucene/highlighter/highlighter.iml b/dev-tools/idea/lucene/highlighter/highlighter.iml
index 0a8e679..8b6d644 100644
--- a/dev-tools/idea/lucene/highlighter/highlighter.iml
+++ b/dev-tools/idea/lucene/highlighter/highlighter.iml
@@ -12,6 +12,7 @@
+
diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template
index 5539f18..c257b1e 100644
--- a/dev-tools/maven/pom.xml.template
+++ b/dev-tools/maven/pom.xml.template
@@ -176,6 +176,11 @@
org.apache.maven.plugins
+ maven-release-plugin
+ 2.5
+
+
+ org.apache.maven.pluginsmaven-clean-plugin2.5
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
index 53b4ecd..d4e347b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
@@ -118,6 +118,12 @@ public final class StemmerOverrideFilter extends TokenFilter {
/**
* Returns the value mapped to the given key or null if the key is not in the FST dictionary.
+ * @param buffer a char[] buffer containing the key
+ * @param bufferLen the length of the char[] buffer
+ * @param scratchArc a scratch Arc
+ * @param fstReader an fstReader
+ * @return a {@link BytesRef} pointing to the value or null
+ * @throws IOException
*/
public BytesRef get(char[] buffer, int bufferLen, Arc scratchArc, BytesReader fstReader) throws IOException {
BytesRef pendingOutput = fst.outputs.getNoOutput();
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
index 4d7635d..d30df0d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -17,13 +17,6 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -32,6 +25,13 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
/** A Query that matches documents matching boolean combinations of other
* queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
* BooleanQuerys.
@@ -143,8 +143,8 @@ public class BooleanQuery extends Query implements Iterable {
if (clauses.size() >= maxClauseCount) {
throw new TooManyClauses();
}
-
clauses.add(clause);
+ fieldset.addAll(clause.getQuery().getFields());
}
/** Returns the set of clauses in this query. */
@@ -242,7 +242,7 @@ public class BooleanQuery extends Query implements Iterable {
for (Iterator wIter = weights.iterator(); wIter.hasNext();) {
Weight w = wIter.next();
BooleanClause c = cIter.next();
- if (w.scorer(context, context.reader().getLiveDocs()) == null) {
+ if (w.scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()) == null) {
if (c.isRequired()) {
fail = true;
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
@@ -306,12 +306,12 @@ public class BooleanQuery extends Query implements Iterable {
@Override
public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder,
- Bits acceptDocs) throws IOException {
+ PostingFeatures flags, Bits acceptDocs) throws IOException {
if (scoreDocsInOrder || minNrShouldMatch > 1) {
// TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch
// but the same is even true of pure conjunctions...
- return super.bulkScorer(context, scoreDocsInOrder, acceptDocs);
+ return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs);
}
List prohibited = new ArrayList();
@@ -319,7 +319,7 @@ public class BooleanQuery extends Query implements Iterable {
Iterator cIter = clauses.iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
- BulkScorer subScorer = w.bulkScorer(context, false, acceptDocs);
+ BulkScorer subScorer = w.bulkScorer(context, false, flags, acceptDocs);
if (subScorer == null) {
if (c.isRequired()) {
return null;
@@ -328,7 +328,7 @@ public class BooleanQuery extends Query implements Iterable {
// TODO: there are some cases where BooleanScorer
// would handle conjunctions faster than
// BooleanScorer2...
- return super.bulkScorer(context, scoreDocsInOrder, acceptDocs);
+ return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs);
} else if (c.isProhibited()) {
prohibited.add(subScorer);
} else {
@@ -340,7 +340,7 @@ public class BooleanQuery extends Query implements Iterable {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs)
+ public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs)
throws IOException {
// initially the user provided value,
// but if minNrShouldMatch == optional.size(),
@@ -353,7 +353,7 @@ public class BooleanQuery extends Query implements Iterable {
Iterator cIter = clauses.iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
- Scorer subScorer = w.scorer(context, acceptDocs);
+ Scorer subScorer = w.scorer(context, flags, acceptDocs);
if (subScorer == null) {
if (c.isRequired()) {
return null;
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
index 173bb44..dbd8827 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
@@ -17,15 +17,11 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.search.BooleanQuery.BooleanWeight;
+
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
import java.util.List;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.BooleanQuery.BooleanWeight;
-
/* Description from Doug Cutting (excerpted from
* LUCENE-1483):
*
@@ -96,14 +92,14 @@ final class BooleanScorer extends BulkScorer {
public void setScorer(Scorer scorer) {
this.scorer = scorer;
}
-
+
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
}
-
+
static final class Bucket {
int doc = -1; // tells if bucket is valid
double score; // incremental score
@@ -262,7 +258,6 @@ final class BooleanScorer extends BulkScorer {
return false;
}
- @Override
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append("boolean(");
diff --git a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
index c5957d8..3e6e757 100644
--- a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
@@ -18,6 +18,8 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.AtomicReaderContext;
+
+import org.apache.lucene.search.intervals.IntervalIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@@ -77,9 +79,14 @@ public abstract class CachingCollector extends FilterCollector {
public final int nextDoc() { throw new UnsupportedOperationException(); }
@Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException { throw new UnsupportedOperationException(); }
+
+ @Override
public long cost() { return 1; }
+
}
+ // A CachingCollector which caches scores
private static class NoScoreCachingCollector extends CachingCollector {
List acceptDocsOutOfOrders;
diff --git a/lucene/core/src/java/org/apache/lucene/search/Collector.java b/lucene/core/src/java/org/apache/lucene/search/Collector.java
index bb47394..9749748 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Collector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Collector.java
@@ -17,10 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.AtomicReaderContext;
+import java.io.IOException;
+
/**
*
Expert: Collectors are primarily meant to be used to
* gather raw results from a search, and implement sorting
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
index 3e81187..5b5b2fe 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
@@ -17,15 +17,19 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.search.intervals.CombinedIntervalIterator;
+import org.apache.lucene.search.intervals.IntervalIterator;
+import org.apache.lucene.util.ArrayUtil;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
-import org.apache.lucene.util.ArrayUtil;
-
/** Scorer for conjunctions, sets of queries, all of which are required. */
class ConjunctionScorer extends Scorer {
+
+ private final Scorer[] scorersOrdered;
protected int lastDoc = -1;
protected final DocsAndFreqs[] docsAndFreqs;
private final DocsAndFreqs lead;
@@ -42,7 +46,10 @@ class ConjunctionScorer extends Scorer {
for (int i = 0; i < scorers.length; i++) {
docsAndFreqs[i] = new DocsAndFreqs(scorers[i]);
}
- // Sort the array the first time to allow the least frequent DocsEnum to
+ scorersOrdered = new Scorer[scorers.length];
+ System.arraycopy(scorers, 0, scorersOrdered, 0, scorers.length);
+
+ // Sort the array the first time to allow the least frequent DocsEnum to
// lead the matching.
ArrayUtil.timSort(docsAndFreqs, new Comparator() {
@Override
@@ -114,6 +121,16 @@ class ConjunctionScorer extends Scorer {
public int freq() {
return docsAndFreqs.length;
}
+
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ if (scorersOrdered == null) {
+ throw new IllegalStateException("no positions requested for this scorer");
+ }
+ // only created if needed for this scorer - no penalty for non-positional queries
+ return new CombinedIntervalIterator(this, collectIntervals, pullIterators(collectIntervals, scorersOrdered));
+ }
+
@Override
public long cost() {
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
index d26abe1..9cc9560 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
@@ -17,8 +17,6 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@@ -28,8 +26,10 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
-import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
class ConstantScoreAutoRewrite extends TermCollectingRewrite {
@@ -109,7 +109,7 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite {
}
}
// Strip scores
- final Query result = new ConstantScoreQuery(bq);
+ final Query result = new ConstantScoreQuery(query.getField(), bq);
result.setBoost(query.getBoost());
return result;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
index 2b7f4ed..ece5c6e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
@@ -20,6 +20,7 @@ package org.apache.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.intervals.IntervalIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
@@ -36,14 +37,20 @@ import java.util.Set;
public class ConstantScoreQuery extends Query {
protected final Filter filter;
protected final Query query;
+ protected final String field;
/** Strips off scores from the passed in Query. The hits will get a constant score
* dependent on the boost factor of this query. */
public ConstantScoreQuery(Query query) {
+ this(null, query);
+ }
+
+ public ConstantScoreQuery(String field, Query query) {
if (query == null)
throw new NullPointerException("Query may not be null");
this.filter = null;
this.query = query;
+ this.field = field;
}
/** Wraps a Filter as a Query. The hits will get a constant score
@@ -57,6 +64,7 @@ public class ConstantScoreQuery extends Query {
throw new NullPointerException("Filter may not be null");
this.filter = filter;
this.query = null;
+ this.field = null;
}
/** Returns the encapsulated filter, returns {@code null} if a query is wrapped. */
@@ -134,14 +142,14 @@ public class ConstantScoreQuery extends Query {
}
@Override
- public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException {
+ public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, PostingFeatures flags, Bits acceptDocs) throws IOException {
final DocIdSetIterator disi;
if (filter != null) {
assert query == null;
- return super.bulkScorer(context, scoreDocsInOrder, acceptDocs);
+ return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs);
} else {
assert query != null && innerWeight != null;
- BulkScorer bulkScorer = innerWeight.bulkScorer(context, scoreDocsInOrder, acceptDocs);
+ BulkScorer bulkScorer = innerWeight.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs);
if (bulkScorer == null) {
return null;
}
@@ -150,7 +158,7 @@ public class ConstantScoreQuery extends Query {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException {
final DocIdSetIterator disi;
if (filter != null) {
assert query == null;
@@ -161,7 +169,7 @@ public class ConstantScoreQuery extends Query {
disi = dis.iterator();
} else {
assert query != null && innerWeight != null;
- disi = innerWeight.scorer(context, acceptDocs);
+ disi = innerWeight.scorer(context, flags, acceptDocs);
}
if (disi == null) {
@@ -177,7 +185,7 @@ public class ConstantScoreQuery extends Query {
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- final Scorer cs = scorer(context, context.reader().getLiveDocs());
+ final Scorer cs = scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs());
final boolean exists = (cs != null && cs.advance(doc) == doc);
final ComplexExplanation result = new ComplexExplanation();
@@ -267,6 +275,15 @@ public class ConstantScoreQuery extends Query {
public long cost() {
return docIdSetIterator.cost();
}
+
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ if (docIdSetIterator instanceof Scorer) {
+ return ((Scorer) docIdSetIterator).intervals(collectIntervals);
+ } else {
+ throw new UnsupportedOperationException("positions are only supported on Scorer subclasses");
+ }
+ }
@Override
public Collection getChildren() {
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
index c195497..b365695 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
@@ -16,6 +16,11 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.Bits;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@@ -23,11 +28,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Set;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.util.Bits;
-
/**
* A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum
* score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries.
@@ -153,11 +153,11 @@ public class DisjunctionMaxQuery extends Query implements Iterable {
/** Create the scorer used to score our associated DisjunctionMaxQuery */
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException {
List scorers = new ArrayList<>();
for (Weight w : weights) {
// we will advance() subscorers
- Scorer subScorer = w.scorer(context, acceptDocs);
+ Scorer subScorer = w.scorer(context, flags, acceptDocs);
if (subScorer != null) {
scorers.add(subScorer);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
index b5d0a0d..bf1cc47 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
@@ -16,6 +16,9 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.search.intervals.DisjunctionIntervalIterator;
+import org.apache.lucene.search.intervals.IntervalIterator;
+
import java.io.IOException;
/**
@@ -46,6 +49,7 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, Scorer[] subScorers) {
super(weight, subScorers);
this.tieBreakerMultiplier = tieBreakerMultiplier;
+
}
@Override
@@ -66,4 +70,10 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
protected float getFinal() {
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
}
+
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ return new DisjunctionIntervalIterator(this, collectIntervals, pullIterators(collectIntervals, subScorers));
+ }
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
index 5b7e2ff..62b633a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
@@ -25,7 +25,7 @@ import java.util.Collection;
* Base class for Scorers that score disjunctions.
*/
abstract class DisjunctionScorer extends Scorer {
- private final Scorer subScorers[];
+ protected final Scorer subScorers[];
private int numScorers;
/** The document number of the current match. */
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
index f291695..6b711b2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
@@ -17,6 +17,9 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.search.intervals.DisjunctionIntervalIterator;
+import org.apache.lucene.search.intervals.IntervalIterator;
+
import java.io.IOException;
/** A Scorer for OR like queries, counterpart of ConjunctionScorer.
@@ -50,4 +53,10 @@ final class DisjunctionSumScorer extends DisjunctionScorer {
protected float getFinal() {
return (float)score * coord[freq];
}
+
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ return new DisjunctionIntervalIterator(this, collectIntervals, pullIterators(collectIntervals, subScorers));
+ }
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
index 909cfe0..684215b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@@ -17,25 +17,30 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.search.PhraseQuery.TermDocsEnumFactory;
+import org.apache.lucene.search.intervals.BlockIntervalIterator;
+import org.apache.lucene.search.intervals.IntervalIterator;
+import org.apache.lucene.search.intervals.TermIntervalIterator;
+import org.apache.lucene.search.similarities.Similarity;
+
import java.io.IOException;
import java.util.Arrays;
-import org.apache.lucene.index.*;
-import org.apache.lucene.search.similarities.Similarity;
-
final class ExactPhraseScorer extends Scorer {
private final int endMinus1;
-
+
private final static int CHUNK = 4096;
-
+
private int gen;
private final int[] counts = new int[CHUNK];
private final int[] gens = new int[CHUNK];
-
+
boolean noDocs;
private final long cost;
private final static class ChunkState {
+ final TermDocsEnumFactory factory;
final DocsAndPositionsEnum posEnum;
final int offset;
final boolean useAdvance;
@@ -43,26 +48,30 @@ final class ExactPhraseScorer extends Scorer {
int posLimit;
int pos;
int lastPos;
-
- public ChunkState(DocsAndPositionsEnum posEnum, int offset, boolean useAdvance) {
+
+ public ChunkState(TermDocsEnumFactory factory, DocsAndPositionsEnum posEnum, int offset,
+ boolean useAdvance) throws IOException {
+ this.factory = factory;
this.posEnum = posEnum;
this.offset = offset;
this.useAdvance = useAdvance;
}
}
-
+
private final ChunkState[] chunkStates;
-
+
private int docID = -1;
private int freq;
private final Similarity.SimScorer docScorer;
-
+ private final String field;
+
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- Similarity.SimScorer docScorer) throws IOException {
+ Similarity.SimScorer docScorer, String field) throws IOException {
super(weight);
this.docScorer = docScorer;
-
+ this.field = field;
+
chunkStates = new ChunkState[postings.length];
endMinus1 = postings.length-1;
@@ -71,36 +80,37 @@ final class ExactPhraseScorer extends Scorer {
cost = postings[0].postings.cost();
for(int i=0;i 1/5th) rarer than
// the first term, then we just use .nextDoc() when
- // ANDing. This buys ~15% gain for phrases where
+ // ANDing. This buys ~15% gain for phrases where
// freq of rarest 2 terms is close:
- final boolean useAdvance = postings[i].docFreq > 5*postings[0].docFreq;
- chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position, useAdvance);
- if (i > 0 && postings[i].postings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
+ final boolean useAdvance = postings[i].docFreq > 5 * postings[0].docFreq;
+ chunkStates[i] = new ChunkState(postings[i].factory, postings[i].postings,
+ -postings[i].position, useAdvance);
+ if (i > 0
+ && postings[i].postings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
noDocs = true;
return;
}
}
}
-
+
@Override
public int nextDoc() throws IOException {
- while(true) {
-
+ while (true) {
+
// first (rarest) term
final int doc = chunkStates[0].posEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
docID = doc;
return doc;
}
-
+
// not-first terms
int i = 1;
- while(i < chunkStates.length) {
+ while (i < chunkStates.length) {
final ChunkState cs = chunkStates[i];
int doc2 = cs.posEnum.docID();
if (cs.useAdvance) {
@@ -109,7 +119,7 @@ final class ExactPhraseScorer extends Scorer {
}
} else {
int iter = 0;
- while(doc2 < doc) {
+ while (doc2 < doc) {
// safety net -- fallback to .advance if we've
// done too many .nextDocs
if (++iter == 50) {
@@ -125,12 +135,12 @@ final class ExactPhraseScorer extends Scorer {
}
i++;
}
-
+
if (i == chunkStates.length) {
// this doc has all the terms -- now test whether
// phrase occurs
docID = doc;
-
+
freq = phraseFreq();
if (freq != 0) {
return docID;
@@ -138,22 +148,22 @@ final class ExactPhraseScorer extends Scorer {
}
}
}
-
+
@Override
public int advance(int target) throws IOException {
-
+
// first term
int doc = chunkStates[0].posEnum.advance(target);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
docID = DocIdSetIterator.NO_MORE_DOCS;
return doc;
}
-
- while(true) {
+
+ while (true) {
// not-first terms
int i = 1;
- while(i < chunkStates.length) {
+ while (i < chunkStates.length) {
int doc2 = chunkStates[i].posEnum.docID();
if (doc2 < doc) {
doc2 = chunkStates[i].posEnum.advance(doc);
@@ -163,7 +173,7 @@ final class ExactPhraseScorer extends Scorer {
}
i++;
}
-
+
if (i == chunkStates.length) {
// this doc has all the terms -- now test whether
// phrase occurs
@@ -173,7 +183,7 @@ final class ExactPhraseScorer extends Scorer {
return docID;
}
}
-
+
doc = chunkStates[0].posEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
docID = doc;
@@ -181,63 +191,63 @@ final class ExactPhraseScorer extends Scorer {
}
}
}
-
+
@Override
public String toString() {
return "ExactPhraseScorer(" + weight + ")";
}
-
+
@Override
public int freq() {
return freq;
}
-
+
@Override
public int docID() {
return docID;
}
-
+
@Override
public float score() {
return docScorer.score(docID, freq);
}
-
+
private int phraseFreq() throws IOException {
-
+
freq = 0;
-
+
// init chunks
- for(int i=0;i cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
@@ -245,7 +255,7 @@ final class ExactPhraseScorer extends Scorer {
assert gens[posIndex] != gen;
gens[posIndex] = gen;
}
-
+
if (cs.posUpto == cs.posLimit) {
end = true;
break;
@@ -254,13 +264,13 @@ final class ExactPhraseScorer extends Scorer {
cs.pos = cs.offset + cs.posEnum.nextPosition();
}
}
-
+
// middle terms
boolean any = true;
- for(int t=1;t cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
@@ -270,7 +280,7 @@ final class ExactPhraseScorer extends Scorer {
any = true;
}
}
-
+
if (cs.posUpto == cs.posLimit) {
end = true;
break;
@@ -278,32 +288,33 @@ final class ExactPhraseScorer extends Scorer {
cs.posUpto++;
cs.pos = cs.offset + cs.posEnum.nextPosition();
}
-
+
if (!any) {
break;
}
}
-
+
if (!any) {
// petered out for this chunk
chunkStart += CHUNK;
chunkEnd += CHUNK;
continue;
}
-
+
// last term
-
+
{
final ChunkState cs = chunkStates[endMinus1];
- while(cs.pos < chunkEnd) {
+ while (cs.pos < chunkEnd) {
if (cs.pos > cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
- if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) {
+ if (posIndex >= 0 && gens[posIndex] == gen
+ && counts[posIndex] == endMinus1) {
freq++;
}
}
-
+
if (cs.posUpto == cs.posLimit) {
end = true;
break;
@@ -312,15 +323,26 @@ final class ExactPhraseScorer extends Scorer {
cs.pos = cs.offset + cs.posEnum.nextPosition();
}
}
-
+
chunkStart += CHUNK;
chunkEnd += CHUNK;
}
-
+
return freq;
}
@Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ TermIntervalIterator[] posIters = new TermIntervalIterator[chunkStates.length];
+ DocsAndPositionsEnum[] enums = new DocsAndPositionsEnum[chunkStates.length];
+ for (int i = 0; i < chunkStates.length; i++) {
+ posIters[i] = new TermIntervalIterator(this, enums[i] = chunkStates[i].factory.docsAndPositionsEnum(),
+ false, collectIntervals, field);
+ }
+ return new SloppyPhraseScorer.AdvancingIntervalIterator(this, collectIntervals, enums, new BlockIntervalIterator(this, collectIntervals, posIters));
+ }
+
+ @Override
public long cost() {
return cost;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java
index e2a50c8..b84feba 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java
@@ -17,6 +17,9 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.search.intervals.IntervalIterator;
+
+import java.io.IOException;
import java.util.Collection;
/** Used by {@link BulkScorer}s that need to pass a {@link
@@ -49,7 +52,12 @@ final class FakeScorer extends Scorer {
public int nextDoc() {
throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()");
}
-
+
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ throw new UnsupportedOperationException("FakeScorer doesn't support intervals()");
+ }
+
@Override
public float score() {
return score;
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java b/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java
index 247bb03..d770115 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java
@@ -1,9 +1,9 @@
package org.apache.lucene.search;
-import java.io.IOException;
-
import org.apache.lucene.index.AtomicReaderContext;
+import java.io.IOException;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterLeafCollector.java b/lucene/core/src/java/org/apache/lucene/search/FilterLeafCollector.java
index e3ae9a8..1e8d818 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FilterLeafCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FilterLeafCollector.java
@@ -49,6 +49,11 @@ public class FilterLeafCollector implements LeafCollector {
}
@Override
+ public Weight.PostingFeatures postingFeatures() {
+ return in.postingFeatures();
+ }
+
+ @Override
public String toString() {
return getClass().getSimpleName() + "(" + in + ")";
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java
index 6b64d8a..607115e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java
@@ -17,11 +17,12 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.search.intervals.IntervalIterator;
+import org.apache.lucene.util.AttributeSource;
+
import java.io.IOException;
import java.util.Collection;
-import org.apache.lucene.util.AttributeSource;
-
/**
* A {@code FilterScorer} contains another {@code Scorer}, which it
* uses as its basic source of data, possibly transforming the data along the
@@ -79,4 +80,9 @@ abstract class FilterScorer extends Scorer {
public AttributeSource attributes() {
return in.attributes();
}
+
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ return in.intervals(collectIntervals);
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java b/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
index d700a30..ab854d9 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
@@ -20,6 +20,8 @@ package org.apache.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Weight.PostingFeatures;
+import org.apache.lucene.search.intervals.IntervalIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
@@ -124,7 +126,7 @@ public class FilteredQuery extends Query {
// return a filtering scorer
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException {
assert filter != null;
DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs);
@@ -133,12 +135,13 @@ public class FilteredQuery extends Query {
return null;
}
- return strategy.filteredScorer(context, weight, filterDocIdSet);
+ return strategy.filteredScorer(context, weight, filterDocIdSet, flags);
}
// return a filtering top scorer
@Override
- public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException {
+ public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, PostingFeatures flags, Bits acceptDocs) throws IOException {
+
assert filter != null;
DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs);
@@ -147,7 +150,9 @@ public class FilteredQuery extends Query {
return null;
}
- return strategy.filteredBulkScorer(context, weight, scoreDocsInOrder, filterDocIdSet);
+
+ return strategy.filteredBulkScorer(context, weight, scoreDocsInOrder, filterDocIdSet, flags);
+
}
};
}
@@ -189,7 +194,6 @@ public class FilteredQuery extends Query {
return scorerDoc = doc;
}
}
-
@Override
public int docID() {
return scorerDoc;
@@ -209,6 +213,11 @@ public class FilteredQuery extends Query {
}
@Override
+ public IntervalIterator intervals(boolean collectIntervals)
+ throws IOException {
+ return scorer.intervals(collectIntervals);
+ }
+
public long cost() {
return scorer.cost();
}
@@ -319,6 +328,11 @@ public class FilteredQuery extends Query {
}
@Override
+ public IntervalIterator intervals(boolean collectIntervals)
+ throws IOException {
+ return scorer.intervals(collectIntervals);
+ }
+
public long cost() {
return Math.min(primary.cost(), secondary.cost());
}
@@ -480,12 +494,13 @@ public class FilteredQuery extends Query {
* the {@link AtomicReaderContext} for which to return the {@link Scorer}.
* @param weight the {@link FilteredQuery} {@link Weight} to create the filtered scorer.
* @param docIdSet the filter {@link DocIdSet} to apply
+ * @param flags the low level {@link PostingFeatures} for this scorer.
* @return a filtered scorer
*
* @throws IOException if an {@link IOException} occurs
*/
public abstract Scorer filteredScorer(AtomicReaderContext context,
- Weight weight, DocIdSet docIdSet) throws IOException;
+ Weight weight, DocIdSet docIdSet, PostingFeatures flags) throws IOException;
/**
* Returns a filtered {@link BulkScorer} based on this
@@ -500,8 +515,8 @@ public class FilteredQuery extends Query {
* @return a filtered top scorer
*/
public BulkScorer filteredBulkScorer(AtomicReaderContext context,
- Weight weight, boolean scoreDocsInOrder, DocIdSet docIdSet) throws IOException {
- Scorer scorer = filteredScorer(context, weight, docIdSet);
+ Weight weight, boolean scoreDocsInOrder, DocIdSet docIdSet, PostingFeatures flags) throws IOException {
+ Scorer scorer = filteredScorer(context, weight, docIdSet, flags);
if (scorer == null) {
return null;
}
@@ -509,6 +524,7 @@ public class FilteredQuery extends Query {
// ignore scoreDocsInOrder:
return new Weight.DefaultBulkScorer(scorer);
}
+
}
/**
@@ -522,7 +538,7 @@ public class FilteredQuery extends Query {
public static class RandomAccessFilterStrategy extends FilterStrategy {
@Override
- public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet) throws IOException {
+ public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet, PostingFeatures flags) throws IOException {
final DocIdSetIterator filterIter = docIdSet.iterator();
if (filterIter == null) {
// this means the filter does not accept any documents.
@@ -539,12 +555,12 @@ public class FilteredQuery extends Query {
final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc);
if (useRandomAccess) {
// if we are using random access, we return the inner scorer, just with other acceptDocs
- return weight.scorer(context, filterAcceptDocs);
+ return weight.scorer(context, flags, filterAcceptDocs);
} else {
assert firstFilterDoc > -1;
// we are gonna advance() this scorer, so we set inorder=true/toplevel=false
// we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice
- final Scorer scorer = weight.scorer(context, null);
+ final Scorer scorer = weight.scorer(context, flags, null);
// TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer
return (scorer == null) ? null : new PrimaryAdvancedLeapFrogScorer(weight, firstFilterDoc, filterIter, scorer);
}
@@ -578,14 +594,14 @@ public class FilteredQuery extends Query {
@Override
public Scorer filteredScorer(AtomicReaderContext context,
- Weight weight, DocIdSet docIdSet) throws IOException {
+ Weight weight, DocIdSet docIdSet, PostingFeatures flags) throws IOException {
final DocIdSetIterator filterIter = docIdSet.iterator();
if (filterIter == null) {
// this means the filter does not accept any documents.
return null;
}
// we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice
- final Scorer scorer = weight.scorer(context, null);
+ final Scorer scorer = weight.scorer(context, flags, null);
if (scorer == null) {
return null;
}
@@ -614,15 +630,14 @@ public class FilteredQuery extends Query {
private static final class QueryFirstFilterStrategy extends FilterStrategy {
@Override
public Scorer filteredScorer(final AtomicReaderContext context,
- Weight weight,
- DocIdSet docIdSet) throws IOException {
+ Weight weight, DocIdSet docIdSet, PostingFeatures flags) throws IOException {
Bits filterAcceptDocs = docIdSet.bits();
if (filterAcceptDocs == null) {
// Filter does not provide random-access Bits; we
// must fallback to leapfrog:
- return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet);
+ return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet, flags);
}
- final Scorer scorer = weight.scorer(context, null);
+ final Scorer scorer = weight.scorer(context, flags, null);
return scorer == null ? null : new QueryFirstScorer(weight,
filterAcceptDocs, scorer);
}
@@ -631,14 +646,14 @@ public class FilteredQuery extends Query {
public BulkScorer filteredBulkScorer(final AtomicReaderContext context,
Weight weight,
boolean scoreDocsInOrder, // ignored (we always top-score in order)
- DocIdSet docIdSet) throws IOException {
+ DocIdSet docIdSet, PostingFeatures flags) throws IOException {
Bits filterAcceptDocs = docIdSet.bits();
if (filterAcceptDocs == null) {
// Filter does not provide random-access Bits; we
// must fallback to leapfrog:
- return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredBulkScorer(context, weight, scoreDocsInOrder, docIdSet);
+ return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredBulkScorer(context, weight, scoreDocsInOrder, docIdSet, flags);
}
- final Scorer scorer = weight.scorer(context, null);
+ final Scorer scorer = weight.scorer(context, flags, null);
return scorer == null ? null : new QueryFirstBulkScorer(scorer, filterAcceptDocs);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
index 8f1a5f6..655003b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -17,6 +17,23 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.index.StoredDocument;
+import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.store.NIOFSDirectory;
+import org.apache.lucene.util.ThreadInterruptedException;
+
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
@@ -32,23 +49,6 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DirectoryReader; // javadocs
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.IndexReaderContext;
-import org.apache.lucene.index.ReaderUtil;
-import org.apache.lucene.index.StoredDocument;
-import org.apache.lucene.index.StoredFieldVisitor;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.search.similarities.DefaultSimilarity;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.store.NIOFSDirectory; // javadoc
-import org.apache.lucene.util.ThreadInterruptedException;
-import org.apache.lucene.index.IndexWriter; // javadocs
-
/** Implements search over a single IndexReader.
*
*
Applications usually need only call the inherited
@@ -608,7 +608,7 @@ public class IndexSearcher {
// continue with the following leaf
continue;
}
- BulkScorer scorer = weight.bulkScorer(ctx, !leafCollector.acceptsDocsOutOfOrder(), ctx.reader().getLiveDocs());
+ BulkScorer scorer = weight.bulkScorer(ctx, !leafCollector.acceptsDocsOutOfOrder(), leafCollector.postingFeatures(), ctx.reader().getLiveDocs());
if (scorer != null) {
try {
scorer.score(leafCollector);
diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
index 562e76d..9ea42e2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
@@ -17,10 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.AtomicReaderContext;
+import java.io.IOException;
+
/**
*
Collector decouples the score from the collected doc:
* the score computation is skipped entirely if it's not
@@ -118,4 +118,9 @@ public interface LeafCollector {
*/
boolean acceptsDocsOutOfOrder();
+ /**
+ * Returns the posting features required by this collector.
+ */
+ public Weight.PostingFeatures postingFeatures();
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
index 8f2edd7..5fca74c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
@@ -20,6 +20,7 @@ package org.apache.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.intervals.IntervalIterator;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.Bits;
@@ -79,6 +80,10 @@ public class MatchAllDocsQuery extends Query {
}
@Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ throw new UnsupportedOperationException("MatchAllDocsQuery doesn't support IntervalIterators");
+ }
+
public long cost() {
return maxDoc;
}
@@ -114,7 +119,7 @@ public class MatchAllDocsQuery extends Query {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException {
return new MatchAllScorer(context.reader(), acceptDocs, this, queryWeight);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
index a2cb61b..3742b21 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
@@ -17,14 +17,16 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.search.intervals.CombinedIntervalIterator;
+import org.apache.lucene.search.intervals.IntervalIterator;
+import org.apache.lucene.util.ArrayUtil;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
-import org.apache.lucene.util.ArrayUtil;
-
/**
* A Scorer for OR like queries, counterpart of ConjunctionScorer.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
@@ -212,6 +214,11 @@ class MinShouldMatchSumScorer extends Scorer {
}
}
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ return new CombinedIntervalIterator(this, collectIntervals, pullIterators(collectIntervals, sortedSubScorers));
+ }
+
/**
* Returns the score of the current document matching the query. Initially
* invalid, until {@link #nextDoc()} is called the first time.
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
index 859b893..0979d00 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
@@ -17,13 +17,11 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.AtomicReaderContext;
+
import java.io.IOException;
import java.util.Arrays;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.Scorer;
-
/**
* A {@link Collector} which allows running a search with several
* {@link Collector}s. It offers a static {@link #wrap} method which accepts a
@@ -136,6 +134,16 @@ public class MultiCollector implements Collector {
return true;
}
+ @Override
+ public Weight.PostingFeatures postingFeatures() {
+ Weight.PostingFeatures features = Weight.PostingFeatures.DOCS_ONLY;
+ for (LeafCollector c : collectors) {
+ if (c.postingFeatures().compareTo(features) > 0)
+ features = c.postingFeatures();
+ }
+ return features;
+ }
+
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
index fe326b7..d435494 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@@ -1,6 +1,6 @@
package org.apache.lucene.search;
-/*
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,12 +17,9 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.*;
-
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
@@ -31,14 +28,29 @@ import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.search.PhraseQuery.TermDocsEnumFactory;
+import org.apache.lucene.search.Weight.PostingFeatures;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.ToStringUtils;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Map;
+import java.util.Set;
+
/**
* MultiPhraseQuery is a generalized version of PhraseQuery, with an added
* method {@link #add(Term[])}.
@@ -94,8 +106,10 @@ public class MultiPhraseQuery extends Query {
* @see PhraseQuery#add(Term, int)
*/
public void add(Term[] terms, int position) {
- if (termArrays.size() == 0)
+ if (termArrays.size() == 0) {
field = terms[0].field();
+ fieldset.add(field);
+ }
for (int i = 0; i < terms.length; i++) {
if (!terms[i].field().equals(field)) {
@@ -179,7 +193,7 @@ public class MultiPhraseQuery extends Query {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException {
assert !termArrays.isEmpty();
final AtomicReader reader = context.reader();
final Bits liveDocs = acceptDocs;
@@ -199,7 +213,7 @@ public class MultiPhraseQuery extends Query {
final DocsAndPositionsEnum postingsEnum;
int docFreq;
-
+ TermDocsEnumFactory factory;
if (terms.length > 1) {
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);
@@ -221,6 +235,7 @@ public class MultiPhraseQuery extends Query {
// None of the terms are in this reader
return null;
}
+ factory = new MultiTermDocsEnumFactory(liveDocs, context, terms, termContexts, termsEnum, flags);
} else {
final Term term = terms[0];
TermState termState = termContexts.get(term).get(context.ord);
@@ -237,10 +252,10 @@ public class MultiPhraseQuery extends Query {
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}
- docFreq = termsEnum.docFreq();
+ factory = new TermDocsEnumFactory(term.bytes(), termState, termsEnum, flags, acceptDocs);
}
-
- postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms);
+
+ postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, factory, termsEnum.docFreq() , positions.get(pos), terms);
}
// sort by increasing docFreq order
@@ -249,20 +264,20 @@ public class MultiPhraseQuery extends Query {
}
if (slop == 0) {
- ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
+ ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), field);
if (s.noDocs) {
return null;
} else {
return s;
}
} else {
- return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
+ return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), field);
}
}
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- Scorer scorer = scorer(context, context.reader().getLiveDocs());
+ Scorer scorer = scorer(context, PostingFeatures.POSITIONS, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
@@ -406,6 +421,27 @@ public class MultiPhraseQuery extends Query {
}
return true;
}
+
+ private static class MultiTermDocsEnumFactory extends TermDocsEnumFactory {
+
+ AtomicReaderContext context;
+ Term[] terms;
+ Map termContexts;
+
+ MultiTermDocsEnumFactory(Bits liveDocs, AtomicReaderContext context, Term[] terms,
+ Map termContexts, TermsEnum termsEnum, PostingFeatures flags) throws IOException {
+ super(termsEnum, flags, liveDocs);
+ this.context = context;
+ this.terms = terms;
+ this.termContexts = termContexts;
+ }
+
+ @Override
+ public DocsAndPositionsEnum docsAndPositionsEnum() throws IOException {
+ return new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum, flags);
+ }
+
+ }
}
/**
@@ -434,25 +470,41 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
}
- private static final class IntQueue {
- private int _arraySize = 16;
+ // TODO: Reimplement this as int[_arraySize * 3], storing position at i * 3,
+ // startOffset at i * 3 + 1 and endOffset at i * 3 + 2. Will need to also
+ // implement a new SorterTemplate to sort the array.
+
+ private static final class PositionQueue {
+ private int _arraySize = 48;
private int _index = 0;
private int _lastIndex = 0;
private int[] _array = new int[_arraySize];
- final void add(int i) {
- if (_lastIndex == _arraySize)
+ final void add(int pos, int start, int end) {
+ if (_lastIndex * 3 == _arraySize)
growArray();
- _array[_lastIndex++] = i;
+ _array[_lastIndex * 3] = pos;
+ _array[_lastIndex * 3 + 1] = start;
+ _array[_lastIndex * 3 + 2] = end;
+ _lastIndex += 1;
}
final int next() {
- return _array[_index++];
+ return _array[_index++ * 3];
+ }
+
+ final int startOffset() {
+ return _array[(_index - 1) * 3 + 1];
+ }
+
+ final int endOffset() {
+ return _array[(_index - 1) * 3 + 2];
}
final void sort() {
- Arrays.sort(_array, _index, _lastIndex);
+ //Arrays.sort(_array, _index, _lastIndex);
+ sorter.sort(_index, _lastIndex);
}
final void clear() {
@@ -470,16 +522,54 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
_array = newArray;
_arraySize *= 2;
}
+
+ private IntroSorter sorter = new IntroSorter() {
+ private int pivot;
+
+ @Override
+ protected void swap(int i, int j) {
+ int ti = _array[i * 3];
+ int ts = _array[i * 3 + 1];
+ int te = _array[i * 3 + 2];
+ _array[i * 3] = _array[j * 3];
+ _array[i * 3 + 1] = _array[j * 3 + 1];
+ _array[i * 3 + 2] = _array[j * 3 + 2];
+ _array[j * 3] = ti;
+ _array[j * 3 + 1] = ts;
+ _array[j * 3 + 2] = te;
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return _array[i * 3] - _array[j * 3];
+ }
+
+ @Override
+ protected void setPivot(int i) {
+ pivot = i;
+ }
+
+ @Override
+ protected int comparePivot(int j) {
+ return pivot - _array[j * 3];
+ }
+ };
}
private int _doc;
private int _freq;
private DocsQueue _queue;
- private IntQueue _posList;
+ private PositionQueue _posList;
private long cost;
- public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map termContexts, TermsEnum termsEnum) throws IOException {
+ public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms,
+ Map termContexts, TermsEnum termsEnum) throws IOException {
+ this(liveDocs, context, terms, termContexts, termsEnum, PostingFeatures.POSITIONS);
+ }
+
+ public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map termContexts, TermsEnum termsEnum, PostingFeatures flags) throws IOException {
List docsEnums = new LinkedList<>();
+
for (int i = 0; i < terms.length; i++) {
final Term term = terms[i];
TermState termState = termContexts.get(term).get(context.ord);
@@ -498,7 +588,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
_queue = new DocsQueue(docsEnums);
- _posList = new IntQueue();
+ _posList = new PositionQueue();
}
@Override
@@ -520,7 +610,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
final int freq = postings.freq();
for (int i = 0; i < freq; i++) {
- _posList.add(postings.nextPosition());
+ _posList.add(postings.nextPosition(), postings.startOffset(), postings.endOffset());
}
if (postings.nextDoc() != NO_MORE_DOCS) {
@@ -543,12 +633,12 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
@Override
public int startOffset() {
- return -1;
+ return _posList.startOffset();
}
@Override
public int endOffset() {
- return -1;
+ return _posList.endOffset();
}
@Override
@@ -568,7 +658,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
@Override
- public final int freq() {
+ public final int freq() throws IOException {
return _freq;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
index 7fb8da6..dc98d049 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
@@ -17,17 +17,17 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
-import org.apache.lucene.index.FilteredTermsEnum; // javadocs
+import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.SingleTermsEnum; // javadocs
+import org.apache.lucene.index.SingleTermsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.AttributeSource;
+import java.io.IOException;
+
/**
* An abstract {@link Query} that matches documents
* containing a subset of terms provided by a {@link
@@ -62,7 +62,7 @@ import org.apache.lucene.util.AttributeSource;
* #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default.
*/
public abstract class MultiTermQuery extends Query {
- protected final String field;
+
protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
/** Abstract class that defines how the query is rewritten. */
@@ -243,6 +243,8 @@ public abstract class MultiTermQuery extends Query {
}
};
+ protected final String field;
+
/**
* Constructs a query matching terms that cannot be represented with a single
* Term.
@@ -252,6 +254,7 @@ public abstract class MultiTermQuery extends Query {
throw new IllegalArgumentException("field must not be null");
}
this.field = field;
+ this.fieldset.add(field);
}
/** Returns the field name for this query */
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
index c975b01..3f38845 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
@@ -17,8 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.Term;
+
import java.io.IOException;
-import org.apache.lucene.index.*;
/**
* Position of a term in a document that takes into account the term offset within the phrase.
@@ -44,6 +46,7 @@ final class PhrasePositions {
final boolean next() throws IOException { // increments to next doc
doc = postings.nextDoc();
+
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
@@ -80,10 +83,14 @@ final class PhrasePositions {
/** for debug purposes */
@Override
public String toString() {
- String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
+ String s = "d:"+doc+" offset:"+offset+" position:"+position+" c:"+count;
if (rptGroup >=0 ) {
s += " rpt:"+rptGroup+",i"+rptInd;
}
+ s += " t: [" + terms[0];
+ for (int i = 1; i < terms.length; i++)
+ s += "," + terms[1];
+ s += "]";
return s;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
index f19ae22..4e15e7e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -1,6 +1,6 @@
package org.apache.lucene.search;
-/*
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,28 +17,30 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Set;
-
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.search.Weight.PostingFeatures;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Set;
+
/** A Query that matches documents containing a particular sequence of terms.
* A PhraseQuery is built by QueryParser for input like "new york".
*
@@ -99,6 +101,7 @@ public class PhraseQuery extends Query {
public void add(Term term, int position) {
if (terms.size() == 0) {
field = term.field();
+ fieldset.add(field);
} else if (!term.field().equals(field)) {
throw new IllegalArgumentException("All phrase terms must be in the same field: " + term);
}
@@ -138,13 +141,15 @@ public class PhraseQuery extends Query {
}
static class PostingsAndFreq implements Comparable {
+ final TermDocsEnumFactory factory;
final DocsAndPositionsEnum postings;
final int docFreq;
final int position;
final Term[] terms;
final int nTerms; // for faster comparisons
- public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term... terms) {
+ public PostingsAndFreq(DocsAndPositionsEnum postings, TermDocsEnumFactory factory, int docFreq, int position, Term... terms) throws IOException {
+ this.factory = factory;
this.postings = postings;
this.docFreq = docFreq;
this.position = position;
@@ -245,7 +250,7 @@ public class PhraseQuery extends Query {
}
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException {
assert !terms.isEmpty();
final AtomicReader reader = context.reader();
final Bits liveDocs = acceptDocs;
@@ -276,7 +281,8 @@ public class PhraseQuery extends Query {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
}
- postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
+ TermDocsEnumFactory factory = new TermDocsEnumFactory(t.bytes(), state, te, flags, acceptDocs);
+ postingsFreqs[i] = new PostingsAndFreq(postingsEnum, factory, te.docFreq(), positions.get(i).intValue(), t);
}
// sort by increasing docFreq order
@@ -285,7 +291,7 @@ public class PhraseQuery extends Query {
}
if (slop == 0) { // optimize exact case
- ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
+ ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), field);
if (s.noDocs) {
return null;
} else {
@@ -293,7 +299,7 @@ public class PhraseQuery extends Query {
}
} else {
return
- new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
+ new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), field);
}
}
@@ -304,7 +310,7 @@ public class PhraseQuery extends Query {
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- Scorer scorer = scorer(context, context.reader().getLiveDocs());
+ Scorer scorer = scorer(context, PostingFeatures.POSITIONS, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
@@ -402,4 +408,33 @@ public class PhraseQuery extends Query {
^ positions.hashCode();
}
+ static class TermDocsEnumFactory {
+ protected final TermsEnum termsEnum;
+ protected final Bits liveDocs;
+ protected final PostingFeatures flags;
+
+ private final BytesRef term;
+ private final TermState termState;
+
+ TermDocsEnumFactory(TermsEnum termsEnum, PostingFeatures flags, Bits liveDocs) {
+ this(null, null, termsEnum, flags, liveDocs);
+ }
+
+ TermDocsEnumFactory(BytesRef term, TermState termState, TermsEnum termsEnum, PostingFeatures flags, Bits liveDocs) {
+ this.termsEnum = termsEnum;
+ this.termState = termState;
+ this.liveDocs = liveDocs;
+ this.term = term;
+ this.flags = flags;
+ }
+
+
+ public DocsAndPositionsEnum docsAndPositionsEnum()
+ throws IOException {
+ assert term != null;
+ termsEnum.seekExact(term, termState);
+ return termsEnum.docsAndPositions(liveDocs, null, flags.docsAndPositionsFlags());
+ }
+
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/Query.java b/lucene/core/src/java/org/apache/lucene/search/Query.java
index 92dc692..a03fc9c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Query.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Query.java
@@ -17,13 +17,13 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
-import java.util.Set;
-
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
/** The abstract base class for queries.
Instantiable subclasses are:
@@ -70,6 +70,12 @@ public abstract class Query implements Cloneable {
return toString("");
}
+ protected final Set fieldset = new HashSet<>();
+
+ public Set getFields() {
+ return fieldset;
+ }
+
/**
* Expert: Constructs an appropriate Weight implementation for this query.
*
diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java
index 755c3cd..d2baaca 100644
--- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java
@@ -17,13 +17,13 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.AtomicReaderContext;
+
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
-import org.apache.lucene.index.AtomicReaderContext;
-
/** A {@link Rescorer} that uses a provided Query to assign
* scores to the first-pass hits.
*
@@ -82,7 +82,7 @@ public abstract class QueryRescorer extends Rescorer {
if (readerContext != null) {
// We advanced to another segment:
docBase = readerContext.docBase;
- scorer = weight.scorer(readerContext, null);
+ scorer = weight.scorer(readerContext, Weight.PostingFeatures.DOCS_AND_FREQS, null);
}
if(scorer != null) {
diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java b/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java
index 1d6c8ff..ce65632 100644
--- a/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java
+++ b/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java
@@ -17,11 +17,12 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.search.Weight.PostingFeatures;
import org.apache.lucene.util.Bits;
+import java.io.IOException;
+
/**
* Constrains search results to only match those which also match a provided
* query.
@@ -56,7 +57,7 @@ public class QueryWrapperFilter extends Filter {
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
- return weight.scorer(privateContext, acceptDocs);
+ return weight.scorer(privateContext, PostingFeatures.DOCS_AND_FREQS, acceptDocs);
}
@Override
public boolean isCacheable() { return false; }
diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
index 4e2a5f1..2a5b33e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
@@ -17,6 +17,9 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.search.intervals.CombinedIntervalIterator;
+import org.apache.lucene.search.intervals.IntervalIterator;
+
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
@@ -130,6 +133,12 @@ class ReqExclScorer extends Scorer {
}
@Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ if (reqScorer == null)
+ return IntervalIterator.NO_MORE_INTERVALS;
+ return new CombinedIntervalIterator(this, collectIntervals, reqScorer.intervals(collectIntervals));
+ }
+
public long cost() {
return reqScorer.cost();
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
index d7b4d86..a77e82a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
@@ -16,6 +16,9 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.search.intervals.DisjunctionIntervalIterator;
+import org.apache.lucene.search.intervals.IntervalIterator;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@@ -86,6 +89,11 @@ class ReqOptSumScorer extends Scorer {
}
@Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ return new DisjunctionIntervalIterator(this, collectIntervals, pullIterators(collectIntervals, reqScorer, optScorer));
+ }
+
+ @Override
public int freq() throws IOException {
// we might have deferred advance()
score();
diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
index 929d3b9..87dabcd 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
@@ -17,12 +17,13 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.search.intervals.IntervalIterator;
+
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
-import org.apache.lucene.index.DocsEnum;
-
/**
* Expert: Common scoring functionality for different types of queries.
*
@@ -53,6 +54,50 @@ public abstract class Scorer extends DocsEnum {
protected Scorer(Weight weight) {
this.weight = weight;
}
+
+ /**
+ * Expert: Retrieves an {@link IntervalIterator} for this scorer allowing
+ * access to position and offset intervals for each
+ * matching document. Call this up-front and use it as
+ * long as you are still using this scorer. The
+ * returned iterator is bound to scorer that created it;
+ * after {@link #nextDoc} or {@link #advance} you must
+ * call {@link IntervalIterator#scorerAdvanced} before
+ * iterating over that document's intervals.
+ *
+ * @param collectIntervals
+ * if true the {@link IntervalIterator} can be used to
+ * collect all individual sub-intervals this {@link IntervalIterator}
+ * is composed of via
+ * {@link IntervalIterator#collect(org.apache.lucene.search.intervals.IntervalCollector)}
+ * @return an {@link IntervalIterator} over matching intervals
+ * @throws IOException
+ * if a low-level I/O error is encountered
+ *
+ * @lucene.experimental
+ */
+ public abstract IntervalIterator intervals(boolean collectIntervals) throws IOException;
+
+ /**
+ * Get the IntervalIterators from a list of scorers
+ * @param collectIntervals true if positions will be collected
+ * @param scorers the list of scorers to retrieve IntervalIterators from
+ * @return a list of IntervalIterators pulled from the passed in Scorers
+ * @throws java.io.IOException if a low-evel I/O error is encountered
+ */
+ public static IntervalIterator[] pullIterators(boolean collectIntervals, Scorer... scorers)
+ throws IOException {
+ IntervalIterator[] iterators = new IntervalIterator[scorers.length];
+ for (int i = 0; i < scorers.length; i++) {
+ if (scorers[i] == null) {
+ iterators[i] = IntervalIterator.NO_MORE_INTERVALS;
+ }
+ else {
+ iterators[i] = scorers[i].intervals(collectIntervals);
+ }
+ }
+ return iterators;
+ }
/** Returns the score of the current document matching the query.
* Initially invalid, until {@link #nextDoc()} or {@link #advance(int)}
diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
index 954b2bb..657dd46 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
@@ -17,20 +17,20 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
-
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
-import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
/**
* Base rewrite method that translates each term into a query, and keeps
@@ -88,7 +88,7 @@ public abstract class ScoringRewrite extends TermCollectingRewr
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
final BooleanQuery bq = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query);
// strip the scores off
- final Query result = new ConstantScoreQuery(bq);
+ final Query result = new ConstantScoreQuery(query.getField(), bq);
result.setBoost(query.getBoost());
return result;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java b/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java
index 5803b2e..863dc3f 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java
@@ -17,10 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.AtomicReaderContext;
+import java.io.IOException;
+
/**
* Base {@link Collector} implementation that is used to collect all contexts.
*
@@ -42,7 +42,12 @@ public abstract class SimpleCollector implements Collector, LeafCollector {
// no-op by default
}
- // redeclare methods so that javadocs are inherited on sub-classes
+ @Override
+ public Weight.PostingFeatures postingFeatures() {
+ return Weight.PostingFeatures.DOCS_AND_FREQS;
+ }
+
+// redeclare methods so that javadocs are inherited on sub-classes
@Override
public abstract boolean acceptsDocsOutOfOrder();
diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
index 986ab06..d30049c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
@@ -17,25 +17,36 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.intervals.Interval;
+import org.apache.lucene.search.intervals.IntervalCollector;
+import org.apache.lucene.search.intervals.IntervalIterator;
+import org.apache.lucene.search.intervals.SloppyIntervalIterator;
+import org.apache.lucene.search.intervals.TermIntervalIterator;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.FixedBitSet;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.util.FixedBitSet;
+import java.util.List;
+import java.util.Map;
final class SloppyPhraseScorer extends Scorer {
private PhrasePositions min, max;
private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
+ private final PhraseQuery.PostingsAndFreq[] postings;
private final Similarity.SimScorer docScorer;
-
+ private final String field;
+
private final int slop;
private final int numPostings;
private final PhraseQueue pq; // for advancing min position
@@ -52,9 +63,11 @@ final class SloppyPhraseScorer extends Scorer {
private final long cost;
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- int slop, Similarity.SimScorer docScorer) {
+ int slop, Similarity.SimScorer docScorer, String field) {
super(weight);
this.docScorer = docScorer;
+ this.postings = postings;
+ this.field = field;
this.slop = slop;
this.numPostings = postings==null ? 0 : postings.length;
pq = new PhraseQueue(postings.length);
@@ -602,4 +615,96 @@ final class SloppyPhraseScorer extends Scorer {
@Override
public String toString() { return "scorer(" + weight + ")"; }
+
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ Map map = new HashMap();
+ List enums = new ArrayList();
+
+ for (int i = 0; i < postings.length; i++) {
+ if (postings[i].terms.length > 1) {
+ throw new UnsupportedOperationException("IntervalIterators for MulitPhraseQuery is not supported");
+ }
+ Term term = postings[i].terms[0];
+ IterAndOffsets iterAndOffset;
+ if (!map.containsKey(term)) {
+ DocsAndPositionsEnum docsAndPosEnum = postings[i].factory
+ .docsAndPositionsEnum();
+ enums.add(docsAndPosEnum);
+ iterAndOffset = new IterAndOffsets(new TermIntervalIterator(this, docsAndPosEnum, false,
+ collectIntervals, field));
+ map.put(term, iterAndOffset);
+ } else {
+ iterAndOffset = map.get(term);
+ }
+ iterAndOffset.offsets.add(postings[i].position);
+ }
+ Collection values = map.values();
+ IntervalIterator[] iters = new IntervalIterator[values.size()];
+ int i = 0;
+ for (IterAndOffsets iterAndOffsets : values) {
+ iters[i++] = SloppyIntervalIterator.create(this, collectIntervals, iterAndOffsets.iter, iterAndOffsets.toIntArray());
+ }
+ return new AdvancingIntervalIterator(this, collectIntervals, enums.toArray(new DocsAndPositionsEnum[enums.size()]), new SloppyIntervalIterator(this, slop, collectIntervals, iters));
+ }
+
+ private final static class IterAndOffsets {
+ final List offsets = new ArrayList();
+ final IntervalIterator iter;
+
+ IterAndOffsets(IntervalIterator iter) {
+ this.iter = iter;
+ }
+
+ int[] toIntArray() {
+ int[] array = new int[offsets.size()];
+ for (int i = 0; i < array.length; i++) {
+ array[i] = offsets.get(i).intValue();
+ }
+ return array;
+ }
+ }
+
+ final static class AdvancingIntervalIterator extends IntervalIterator {
+
+ public AdvancingIntervalIterator(Scorer scorer, boolean collectIntervals, final DocsAndPositionsEnum[] enums, final IntervalIterator delegate) {
+ super(scorer, collectIntervals);
+ this.enums = enums;
+ this.delegate = delegate;
+ }
+
+ private final DocsAndPositionsEnum[] enums;
+ private final IntervalIterator delegate;
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ assert docId == docID();
+ for (DocsAndPositionsEnum oneEnum : enums) {
+ int advance = oneEnum.advance(docId);
+ assert advance == docId;
+ }
+ delegate.scorerAdvanced(docId);
+ return docId;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ return delegate.next();
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ delegate.collect(collector);
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return delegate.subs(inOrder);
+ }
+
+ @Override
+ public int matchDistance() {
+ return delegate.matchDistance();
+ }
+
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
index 5435ccd..7e8d545 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
@@ -17,71 +17,80 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Set;
-
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
-/** A Query that matches documents containing a term.
- This may be combined with other terms with a {@link BooleanQuery}.
- */
+import java.io.IOException;
+import java.util.Set;
+
+/**
+ * A Query that matches documents containing a term. This may be combined with
+ * other terms with a {@link BooleanQuery}.
+ */
public class TermQuery extends Query {
private final Term term;
private final int docFreq;
private final TermContext perReaderTermState;
-
+
final class TermWeight extends Weight {
private final Similarity similarity;
private final Similarity.SimWeight stats;
private final TermContext termStates;
-
+
public TermWeight(IndexSearcher searcher, TermContext termStates)
- throws IOException {
+ throws IOException {
assert termStates != null : "TermContext must not be null";
this.termStates = termStates;
this.similarity = searcher.getSimilarity();
- this.stats = similarity.computeWeight(
- getBoost(),
- searcher.collectionStatistics(term.field()),
+ this.stats = similarity.computeWeight(getBoost(),
+ searcher.collectionStatistics(term.field()),
searcher.termStatistics(term, termStates));
}
-
+
@Override
- public String toString() { return "weight(" + TermQuery.this + ")"; }
-
+ public String toString() {
+ return "weight(" + TermQuery.this + ")";
+ }
+
@Override
- public Query getQuery() { return TermQuery.this; }
-
+ public Query getQuery() {
+ return TermQuery.this;
+ }
+
@Override
public float getValueForNormalization() {
return stats.getValueForNormalization();
}
-
+
@Override
public void normalize(float queryNorm, float topLevelBoost) {
stats.normalize(queryNorm, topLevelBoost);
}
-
+
@Override
- public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException {
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
final TermsEnum termsEnum = getTermsEnum(context);
if (termsEnum == null) {
return null;
}
- DocsEnum docs = termsEnum.docs(acceptDocs, null);
+ DocsEnum docs;
+ if (flags.compareTo(PostingFeatures.POSITIONS) < 0) {
+ docs = termsEnum.docs(acceptDocs, null, flags.docFlags());
+ } else {
+ docs = termsEnum.docsAndPositions(acceptDocs, null, flags.docsAndPositionsFlags());
+ }
assert docs != null;
return new TermScorer(this, docs, similarity.simScorer(stats, context));
}
@@ -96,90 +105,101 @@ public class TermQuery extends Query {
assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
return null;
}
- //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
- final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
+ // System.out.println("LD=" + reader.getLiveDocs() + " set?=" +
+ // (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
+ final TermsEnum termsEnum = context.reader().terms(term.field())
+ .iterator(null);
termsEnum.seekExact(term.bytes(), state);
return termsEnum;
}
private boolean termNotInReader(AtomicReader reader, Term term) throws IOException {
// only called from assert
- //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
+ // System.out.println("TQ.termNotInReader reader=" + reader + " term=" +
+ // field + ":" + bytes.utf8ToString());
return reader.docFreq(term) == 0;
}
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
- Scorer scorer = scorer(context, context.reader().getLiveDocs());
+ Scorer scorer = scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
- result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
- Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));
+ result.setDescription("weight(" + getQuery() + " in " + doc + ") ["
+ + similarity.getClass().getSimpleName() + "], result of:");
+ Explanation scoreExplanation = docScorer.explain(doc,
+ new Explanation(freq, "termFreq=" + freq));
result.addDetail(scoreExplanation);
result.setValue(scoreExplanation.getValue());
result.setMatch(true);
return result;
}
}
- return new ComplexExplanation(false, 0.0f, "no matching term");
+ return new ComplexExplanation(false, 0.0f, "no matching term");
}
}
-
+
/** Constructs a query for the term t. */
public TermQuery(Term t) {
this(t, -1);
}
-
- /** Expert: constructs a TermQuery that will use the
- * provided docFreq instead of looking up the docFreq
- * against the searcher. */
+
+ /**
+ * Expert: constructs a TermQuery that will use the provided docFreq instead
+ * of looking up the docFreq against the searcher.
+ */
public TermQuery(Term t, int docFreq) {
term = t;
this.docFreq = docFreq;
perReaderTermState = null;
+ this.fieldset.add(t.field());
}
- /** Expert: constructs a TermQuery that will use the
- * provided docFreq instead of looking up the docFreq
- * against the searcher. */
+ /**
+ * Expert: constructs a TermQuery that will use the provided docFreq instead
+ * of looking up the docFreq against the searcher.
+ */
public TermQuery(Term t, TermContext states) {
assert states != null;
term = t;
docFreq = states.docFreq();
perReaderTermState = states;
+ this.fieldset.add(term.field());
}
-
+
/** Returns the term of this query. */
- public Term getTerm() { return term; }
+ public Term getTerm() {
+ return term;
+ }
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
final IndexReaderContext context = searcher.getTopReaderContext();
final TermContext termState;
+
if (perReaderTermState == null || perReaderTermState.topReaderContext != context) {
// make TermQuery single-pass if we don't have a PRTS or if the context differs!
termState = TermContext.build(context, term);
} else {
- // PRTS was pre-build for this IS
- termState = this.perReaderTermState;
+ // PRTS was pre-build for this IS
+ termState = this.perReaderTermState;
}
-
+
// we must not ignore the given docFreq - if set use the given value (lie)
- if (docFreq != -1)
- termState.setDocFreq(docFreq);
+ if (docFreq != -1) termState.setDocFreq(docFreq);
return new TermWeight(searcher, termState);
}
-
+
@Override
public void extractTerms(Set terms) {
terms.add(getTerm());
}
-
+
/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {
@@ -192,21 +212,20 @@ public class TermQuery extends Query {
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
-
+
/** Returns true iff o is equal to this. */
@Override
public boolean equals(Object o) {
- if (!(o instanceof TermQuery))
- return false;
- TermQuery other = (TermQuery)o;
+ if (!(o instanceof TermQuery)) return false;
+ TermQuery other = (TermQuery) o;
return (this.getBoost() == other.getBoost())
- && this.term.equals(other.term);
+ && this.term.equals(other.term);
}
-
- /** Returns a hash code value for this object.*/
+
+ /** Returns a hash code value for this object. */
@Override
public int hashCode() {
return Float.floatToIntBits(getBoost()) ^ term.hashCode();
}
-
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
index 6697524..652b805 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
@@ -17,11 +17,14 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
+import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.search.intervals.IntervalIterator;
+import org.apache.lucene.search.intervals.TermIntervalIterator;
import org.apache.lucene.search.similarities.Similarity;
+import java.io.IOException;
+
/** Expert: A Scorer for documents matching a Term.
*/
final class TermScorer extends Scorer {
@@ -93,4 +96,12 @@ final class TermScorer extends Scorer {
/** Returns a string representation of this TermScorer. */
@Override
public String toString() { return "scorer(" + weight + ")"; }
+
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ assert docsEnum instanceof DocsAndPositionsEnum;
+ String field = ((TermQuery) weight.getQuery()).getTerm().field();
+ return new TermIntervalIterator(this, (DocsAndPositionsEnum) docsEnum, false, collectIntervals, field);
+ }
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
index bfebeda..4c2d0ce 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
@@ -17,10 +17,10 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
import org.apache.lucene.index.AtomicReaderContext;
+import java.io.IOException;
+
/**
* A {@link Collector} implementation that collects the top-scoring hits,
* returning them as a {@link TopDocs}. This is used by {@link IndexSearcher} to
@@ -308,4 +308,5 @@ public abstract class TopScoreDocCollector extends TopDocsCollector {
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java b/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java
index 4fc5be6..5aa8d55 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.search.Weight.PostingFeatures;
/**
* Just counts the total number of hits.
@@ -36,6 +37,12 @@ public class TotalHitCountCollector extends SimpleCollector {
}
@Override
+ public PostingFeatures postingFeatures() {
+ // we don't need frequencies here
+ return PostingFeatures.DOCS_ONLY;
+ }
+
+ @Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java
index 8398157..b9a1c15 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@@ -17,14 +17,18 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-
-import org.apache.lucene.index.AtomicReader; // javadocs
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.IndexReaderContext; // javadocs
+
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexReaderContext; // javadocs
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
+import java.io.IOException;
+
/**
* Expert: Calculate query weights and build query scorers.
*
@@ -35,7 +39,8 @@ import org.apache.lucene.util.Bits;
* {@link AtomicReader} dependent state should reside in the {@link Scorer}.
*
* Since {@link Weight} creates {@link Scorer} instances for a given
- * {@link AtomicReaderContext} ({@link #scorer(AtomicReaderContext, Bits)})
+ * {@link AtomicReaderContext} ({@link #scorer(AtomicReaderContext,
+ * PostingFeatures, Bits)})
* callers must maintain the relationship between the searcher's top-level
* {@link IndexReaderContext} and the context used to create a {@link Scorer}.
*
The query normalization factor is passed to {@link #normalize(float, float)}. At
* this point the weighting is complete.
*
A Scorer is constructed by
- * {@link #scorer(AtomicReaderContext, Bits)}.
+ * {@link #scorer(AtomicReaderContext, PostingFeatures, Bits)}.
*
*
* @since 2.9
@@ -90,6 +95,7 @@ public abstract class Weight {
*
* @param context
* the {@link AtomicReaderContext} for which to return the {@link Scorer}.
+ * @param flags the low level {@link PostingFeatures} for this scorer.
* @param acceptDocs
* Bits that represent the allowable docs to match (typically deleted docs
* but possibly filtering other documents)
@@ -97,7 +103,7 @@ public abstract class Weight {
* @return a {@link Scorer} which scores documents in/out-of order.
* @throws IOException if there is a low-level I/O error
*/
- public abstract Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException;
+ public abstract Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException;
/**
* Optional method, to return a {@link BulkScorer} to
@@ -116,19 +122,15 @@ public abstract class Weight {
* in-order scorer is also an out-of-order one. However, an
* out-of-order scorer may not support {@link Scorer#nextDoc()}
* and/or {@link Scorer#advance(int)}, therefore it is recommended to
- * request an in-order scorer if use of these
- * methods is required.
* @param acceptDocs
* Bits that represent the allowable docs to match (typically deleted docs
* but possibly filtering other documents)
- *
- * @return a {@link BulkScorer} which scores documents and
- * passes them to a collector.
+ * @return a {@link Scorer} which scores documents in/out-of order.
* @throws IOException if there is a low-level I/O error
*/
- public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException {
+ public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, PostingFeatures flags, Bits acceptDocs) throws IOException {
- Scorer scorer = scorer(context, acceptDocs);
+ Scorer scorer = scorer(context, flags, acceptDocs);
if (scorer == null) {
// No docs match
return null;
@@ -206,6 +208,58 @@ public abstract class Weight {
* NOTE: the default implementation returns false, i.e.
* the Scorer scores documents in-order.
*/
+
+ /**
+ * Feature flags used to control low-level posting list features. These flags
+ * all Collectors and scorers to specify their requirements for document
+ * collection and scoring ahead of time for best performance.
+ */
+ public static enum PostingFeatures {
+ /**Only document IDs are required for document collection and scoring*/
+ DOCS_ONLY(0, 0),
+ /**Document IDs and Term Frequencies are required for document collection and scoring*/
+ DOCS_AND_FREQS(DocsEnum.FLAG_FREQS, 0),
+ /**Document IDs, Term Frequencies and Positions are required for document collection and scoring*/
+ POSITIONS(DocsEnum.FLAG_FREQS, 0),
+ /**Document IDs, Term Frequencies, Positions and Payloads are required for document collection and scoring*/
+ POSITIONS_AND_PAYLOADS(DocsEnum.FLAG_FREQS, DocsAndPositionsEnum.FLAG_PAYLOADS),
+ /**Document IDs, Term Frequencies, Positions and Offsets are required for document collection and scoring*/
+ OFFSETS(DocsEnum.FLAG_FREQS, DocsAndPositionsEnum.FLAG_OFFSETS),
+ /**Document IDs, Term Frequencies, Positions, Offsets and Payloads are required for document collection and scoring*/
+ OFFSETS_AND_PAYLOADS(DocsEnum.FLAG_FREQS, DocsAndPositionsEnum.FLAG_OFFSETS
+ | DocsAndPositionsEnum.FLAG_PAYLOADS);
+
+ private final int docsAndPositionsFlags;
+ private final int docFlags;
+
+ private PostingFeatures(int docFlags, int docsAndPositionsFlags) {
+ this.docsAndPositionsFlags = docsAndPositionsFlags;
+ this.docFlags = docFlags;
+ }
+
+ /**
+ * Returns the flags for {@link DocsAndPositionsEnum}. This value should be
+ * passed to
+ * {@link TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum, int)}
+ *
+ * @return {@link DocsAndPositionsEnum} flags
+ */
+ public int docsAndPositionsFlags() {
+ return docsAndPositionsFlags;
+ }
+
+ /**
+ * Returns the flags for {@link DocsEnum}. This value should be
+ * passed to
+ * {@link TermsEnum#docs(Bits, DocsEnum, int)}
+ *
+ * @return {@link DocsEnum} flags
+ */
+ public int docFlags() {
+ return docFlags;
+ }
+ }
+
public boolean scoresDocsOutOfOrder() {
return false;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalFilter.java
new file mode 100644
index 0000000..7383c20
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalFilter.java
@@ -0,0 +1,41 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class BlockIntervalFilter implements IntervalFilter {
+
+ private final boolean collectLeaves;
+
+ public BlockIntervalFilter() {
+ this(true);
+ }
+
+ public BlockIntervalFilter(boolean collectLeaves) {
+ this.collectLeaves = collectLeaves;
+ }
+
+ @Override
+ public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) {
+ return new BlockIntervalIterator(collectIntervals, collectLeaves, iter);
+ }
+
+ @Override
+ public String toString() {
+ return "BLOCK";
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalIterator.java
new file mode 100644
index 0000000..d19fbe7
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalIterator.java
@@ -0,0 +1,180 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.search.Scorer;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * An IntervalIterator implementing minimum interval semantics for the
+ * BLOCK operator
+ *
+ * See "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics"
+ *
+ * @lucene.experimental
+ */
+public final class BlockIntervalIterator extends IntervalIterator {
+ private final IntervalIterator[] iterators;
+
+ private static final Interval INFINITE_INTERVAL = new Interval();
+ private final Interval[] intervals;
+ private final Interval interval = new Interval();
+ private final int[] gaps;
+
+ private final int lastIter;
+ private boolean collectLeaves = true;
+
+ public BlockIntervalIterator(boolean collectIntervals, boolean collectLeaves, IntervalIterator other) {
+ this(collectIntervals, other);
+ this.collectLeaves = collectLeaves;
+ }
+
+ /**
+ * Construct a BlockIntervalIterator over a compound IntervalIterator. The
+ * sub-iterators must be in order and sequential for a match.
+ * @param collectIntervals true if intervals will be collected
+ * @param other the compound {@link IntervalIterator} used to extract the individual block iterators
+ */
+ public BlockIntervalIterator(boolean collectIntervals, IntervalIterator other) {
+ this(collectIntervals, defaultIncrements(other.subs(true).length), other);
+ }
+
+ /**
+ * Construct a BlockIntervalIterator over a compound IntervalIterator using
+ * a supplied increments array.
+ * @param collectIntervals true if intervals will be collected
+ * @param increments an array of position increments between the iterators
+ * @param other the compound {@link IntervalIterator} used to extract the individual block iterators
+ */
+ public BlockIntervalIterator(boolean collectIntervals, int[] increments, IntervalIterator other) {
+ super(other.getScorer(), collectIntervals);
+ assert other.subs(true) != null;
+ iterators = other.subs(true);
+ assert iterators.length > 1;
+ intervals = new Interval[iterators.length];
+ lastIter = iterators.length - 1;
+ this.gaps = increments;
+ }
+
+ /**
+ * Construct a BlockIntervalIterator over a set of subiterators using a supplied
+ * increments array
+ * @param scorer the parent Scorer
+ * @param increments an array of position increments between the iterators
+ * @param collectIntervals true if intervals will be collected
+ * @param iterators the subiterators
+ */
+ public BlockIntervalIterator(Scorer scorer, int[] increments, boolean collectIntervals,
+ IntervalIterator... iterators) {
+ super(scorer, collectIntervals);
+ assert iterators.length > 1;
+ this.iterators = iterators;
+ intervals = new Interval[iterators.length];
+ lastIter = iterators.length - 1;
+ this.gaps = increments;
+ }
+
+ /**
+ * Construct a BlockIntervalIterator over a set of subiterators
+ * @param scorer the parent Scorer
+ * @param collectIntervals true if intervals will be collected
+ * @param iterators the subiterators
+ */
+ public BlockIntervalIterator(Scorer scorer, boolean collectIntervals, IntervalIterator... iterators) {
+ this(scorer, defaultIncrements(iterators.length), collectIntervals, iterators);
+ }
+
+ private static int[] defaultIncrements(int num) {
+ int[] gaps = new int[num];
+ Arrays.fill(gaps, 1);
+ return gaps;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ if ((intervals[0] = iterators[0].next()) == null) {
+ return null;
+ }
+ int offset = 0;
+ for (int i = 1; i < iterators.length;) {
+ final int gap = gaps[i];
+ while (intervals[i].begin + gap <= intervals[i - 1].end) {
+ if ((intervals[i] = iterators[i].next()) == null) {
+ return null;
+ }
+ }
+ offset += gap;
+ if (intervals[i].begin == intervals[i - 1].end + gaps[i]) {
+ i++;
+ if (i < iterators.length && intervals[i] == INFINITE_INTERVAL) {
+ // advance only if really necessary
+ iterators[i].scorerAdvanced(docID());
+ assert iterators[i].docID() == docID();
+ }
+ } else {
+ do {
+ if ((intervals[0] = iterators[0].next()) == null) {
+ return null;
+ }
+ } while (intervals[0].begin < intervals[i].end - offset);
+
+ i = 1;
+ }
+ }
+ interval.begin = intervals[0].begin;
+ interval.end = intervals[lastIter].end;
+ interval.offsetBegin = intervals[0].offsetBegin;
+ interval.offsetEnd = intervals[lastIter].offsetEnd;
+ interval.field = intervals[0].field;
+ return interval;
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return iterators;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ assert collectIntervals;
+ collector.collectComposite(scorer, interval, docID());
+ if (collectLeaves) {
+ for (IntervalIterator iter : iterators) {
+ iter.collect(collector);
+ }
+ }
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ iterators[0].scorerAdvanced(docId);
+ assert iterators[0].docID() == docId;
+ iterators[1].scorerAdvanced(docId);
+ assert iterators[1].docID() == docId;
+ Arrays.fill(intervals, INFINITE_INTERVAL);
+ return docId;
+ }
+
+ @Override
+ public int matchDistance() {
+ return intervals[lastIter].begin - intervals[0].end;
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/BrouwerianIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/BrouwerianIntervalIterator.java
new file mode 100644
index 0000000..5ee6abb
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/BrouwerianIntervalIterator.java
@@ -0,0 +1,123 @@
+package org.apache.lucene.search.intervals;
+
+import org.apache.lucene.search.Scorer;
+
+import java.io.IOException;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * IntervalIterator based on minimum interval semantics for the Brouwerian
+ * operator. This {@link IntervalIterator} computes the difference M-S
+ * between the anti-chains M (minuend) and S (subtracted).
+ *
+ *
+ *
+ * See "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics"
+ */
+public class BrouwerianIntervalIterator extends IntervalIterator {
+
+ private final IntervalIterator minuend;
+ private final IntervalIterator subtracted;
+ private Interval subtractedInterval;
+ private Interval currentInterval;
+ private final String field;
+
+ /**
+ * Construct a new BrouwerianIntervalIterator over a minuend and a subtrahend
+ * IntervalIterator
+ * @param scorer the parent Scorer
+ * @param collectIntervals true if intervals will be collected
+ * @param minuend the minuend IntervalIterator
+ * @param subtracted the subtrahend IntervalIterator
+ */
+ public BrouwerianIntervalIterator(Scorer scorer, boolean collectIntervals,
+ IntervalIterator minuend, IntervalIterator subtracted, String field) {
+ super(scorer, collectIntervals);
+ this.minuend = minuend;
+ this.subtracted = subtracted;
+ this.field = field;
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ //System.out.println("Advancing to doc " + docId);
+ //Thread.dumpStack();
+ minuend.scorerAdvanced(docId);
+ if (subtracted.docID() <= docId)
+ subtracted.scorerAdvanced(docId);
+ subtractedInterval = new Interval(field);
+ return docId;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ //System.out.println("next()");
+ //System.out.println("Subtractend: " + subtracted.toString());
+ //System.out.println("Minuend: " + minuend.toString());
+ if (subtracted.docID() != minuend.docID() || subtractedInterval == null) {
+ //System.out.println("No subtrahend on doc " + minuend.docID());
+ currentInterval = minuend.next();
+ //System.out.println("----Returning " + currentInterval);
+ return currentInterval;
+ }
+ while ((currentInterval = minuend.next()) != null) {
+ //System.out.println("next() : advancing through minuend");
+ //System.out.println("Subtract intervals: " + subtractedInterval.toString());
+ //System.out.println("Current interval: " + currentInterval.toString());
+ while(subtractedInterval.lessThanExclusive(currentInterval) && (subtractedInterval = subtracted.next()) != null) {
+ //System.out.println("next{} : advancing through subtrahend");
+ //System.out.println("Subtractend: " + subtracted.toString());
+ //System.out.println("Minuend: " + minuend.toString());
+ }
+ if (subtractedInterval == null || !currentInterval.overlaps(subtractedInterval)) {
+ //System.out.println("----Returning " + currentInterval);
+ return currentInterval;
+ }
+ }
+ //System.out.println("----Returning " + currentInterval);
+ return currentInterval;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ assert collectIntervals;
+ collector.collectComposite(scorer, currentInterval, docID());
+ minuend.collect(collector);
+
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return new IntervalIterator[] {minuend, subtracted};
+ }
+
+
+ @Override
+ public int matchDistance() {
+ return minuend.matchDistance();
+ }
+
+ @Override
+ public int docID() {
+ return minuend.docID();
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/CombinedIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/CombinedIntervalIterator.java
new file mode 100644
index 0000000..c98dbc3
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/CombinedIntervalIterator.java
@@ -0,0 +1,132 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.PriorityQueue;
+
+import java.io.IOException;
+
+public class CombinedIntervalIterator extends IntervalIterator {
+
+ private final IntervalPriorityQueue intervalQueue;
+ private final IntervalIterator[] children;
+
+ private final Interval current = new Interval();
+
+ private SnapshotPositionCollector snapshot;
+
+ public CombinedIntervalIterator(Scorer scorer, boolean collectIntervals, IntervalIterator... children) {
+ super(scorer, collectIntervals);
+ this.children = children;
+ intervalQueue = new IntervalPriorityQueue(children.length);
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ intervalQueue.clear();
+ for (IntervalIterator child : children) {
+ IntervalIteratorRef ref = new IntervalIteratorRef(child, docId);
+ if (ref.interval != null)
+ intervalQueue.add(ref);
+ }
+ intervalQueue.updateTop();
+ return docId;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ if (intervalQueue.size() == 0)
+ return null;
+
+ IntervalIteratorRef top = intervalQueue.top();
+ current.copy(top.interval);
+ if (collectIntervals)
+ snapShotSubPositions();
+ Interval interval;
+ if ((interval = top.iterator.next()) != null) {
+ top.interval = interval;
+ intervalQueue.updateTop();
+ }
+ else
+ intervalQueue.pop();
+
+ return current;
+ }
+
+ private void snapShotSubPositions() {
+ if (snapshot == null) {
+ snapshot = new SnapshotPositionCollector(intervalQueue.size());
+ }
+ snapshot.reset();
+ collectInternal(snapshot);
+ }
+
+ private void collectInternal(IntervalCollector collector) {
+ assert collectIntervals;
+ intervalQueue.top().iterator.collect(collector);
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ assert collectIntervals;
+ if (snapshot == null) {
+ // we might not be initialized if the first interval matches
+ collectInternal(collector);
+ } else {
+ snapshot.replay(collector);
+ }
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return children;
+ }
+
+ @Override
+ public int matchDistance() {
+ return 0;
+ }
+
+ public static class IntervalIteratorRef {
+
+ final IntervalIterator iterator;
+ Interval interval = null;
+ int doc = -1;
+
+ public IntervalIteratorRef(IntervalIterator iterator, int advanceTo) throws IOException {
+ this.iterator = iterator;
+ this.doc = this.iterator.scorerAdvanced(advanceTo);
+ if (this.doc == advanceTo) {
+ this.interval = this.iterator.next();
+ }
+ }
+ }
+
+ public static class IntervalPriorityQueue extends PriorityQueue {
+
+ public IntervalPriorityQueue(int maxSize) {
+ super(maxSize);
+ }
+
+ @Override
+ protected boolean lessThan(IntervalIteratorRef a, IntervalIteratorRef b) {
+ return a.doc < b.doc || a.doc == b.doc && a.interval.strictlyLessThan(b.interval);
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java
new file mode 100644
index 0000000..572bf75
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java
@@ -0,0 +1,183 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.intervals.IntervalQueue.IntervalRef;
+
+import java.io.IOException;
+
+/**
+ * ConjuctionIntervalIterator based on minimal interval semantics for AND
+ * operator.
+ *
+ * See "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics"
+ *
+ * @lucene.experimental
+ */
+public final class ConjunctionIntervalIterator extends IntervalIterator {
+
+ private final IntervalQueueAnd queue;
+ private final int nrMustMatch;
+ private SnapshotPositionCollector snapshot;
+ private final IntervalIterator[] iterators;
+ private int rightExtremeBegin;
+ private final boolean collectLeaves;
+
+ /**
+ * Create a new ConjunctionIntervalIterator over a set of subiterators
+ * @param scorer the parent scorer
+ * @param collectIntervals true if intervals will be collected
+ * @param iterators a list of iterators to combine
+ * @throws IOException if a low level I/O exception occurs
+ */
+ public ConjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, boolean collectLeaves,
+ IntervalIterator... iterators) {
+ this(scorer, collectIntervals, collectLeaves, iterators.length, iterators);
+ }
+
+ public ConjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, IntervalIterator... iterators) {
+ this(scorer, collectIntervals, false, iterators);
+ }
+
+ public ConjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, int mm, IntervalIterator... iterators) {
+ this(scorer, collectIntervals, false, mm, iterators);
+ }
+
+ /**
+ * Create a new ConjunctionIntervalIterator over a set of subiterators,
+ * with a minimum number of matching subiterators per document
+ * @param scorer the parent Scorer
+ * @param collectIntervals true if intervals will be collected
+ * @param minimuNumShouldMatch the number of subiterators that should
+ * match a document for a match to be returned
+ * @param iterators a list of iterators to combine
+ * @throws IOException if an low level I/O exception occurs
+ */
+ public ConjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, boolean collectLeaves,
+ int minimuNumShouldMatch, IntervalIterator... iterators) {
+ super(scorer, collectIntervals);
+ this.iterators = iterators;
+ this.queue = new IntervalQueueAnd(iterators.length);
+ this.nrMustMatch = minimuNumShouldMatch;
+ this.collectLeaves = collectLeaves;
+ }
+
+ private void advance() throws IOException {
+ final IntervalRef top = queue.top();
+ Interval interval = null;
+ if ((interval = iterators[top.index].next()) != null) {
+ top.interval = interval;
+ queue.updateRightExtreme(top);
+ queue.updateTop();
+ } else {
+ queue.pop();
+ }
+ }
+
+ @Override
+ public Interval next() throws IOException {
+
+ while (queue.size() >= nrMustMatch //&& queue.top().interval.field.equals(queue.currentCandidate.field)
+ && queue.top().interval.begin == queue.currentCandidate.begin) {
+ advance();
+ }
+ if (queue.size() < nrMustMatch) {
+ return null;
+ }
+ do {
+ queue.updateCurrentCandidate();
+ Interval top = queue.top().interval;
+ if (collectIntervals) {
+ snapShotSubPositions(); // this looks odd? -> see SnapShotCollector below for
+ // details!
+ }
+ if (queue.currentCandidate.begin == top.begin //&& queue.currentCandidate.field.equals(top.field)
+ && queue.currentCandidate.end == top.end) {
+ return queue.currentCandidate;
+ }
+ rightExtremeBegin = queue.rightExtremeBegin;
+ advance();
+ } while (queue.size() >= nrMustMatch && queue.currentCandidate.end == queue.rightExtreme);
+ return queue.currentCandidate; // TODO support payloads
+ }
+
+
+ @Override
+ public int scorerAdvanced(final int docId) throws IOException {
+ if (docId == NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+ queue.reset();
+ for (int i = 0; i < iterators.length; i++) {
+ int scorerAdvanced = iterators[i].scorerAdvanced(docId);
+ if (scorerAdvanced != docId)
+ return scorerAdvanced;
+ assert scorerAdvanced == docId;
+ final Interval interval = iterators[i].next();
+ if (interval != null) {
+ IntervalRef intervalRef = new IntervalRef(interval, i); // TODO maybe
+ // reuse?
+ queue.updateRightExtreme(intervalRef);
+ queue.add(intervalRef);
+ }
+ }
+ return docId;
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return iterators;
+ }
+
+
+ private void snapShotSubPositions() {
+ if (snapshot == null) {
+ snapshot = new SnapshotPositionCollector(queue.size());
+ }
+ snapshot.reset();
+ collectInternal(snapshot);
+ }
+
+ private void collectInternal(IntervalCollector collector) {
+ assert collectIntervals;
+ collector.collectComposite(scorer, queue.currentCandidate, docID());
+ if (collectLeaves) {
+ for (IntervalIterator iter : iterators) {
+ iter.collect(collector);
+ }
+ }
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ assert collectIntervals;
+ if (snapshot == null) {
+ // we might not be initialized if the first interval matches
+ collectInternal(collector);
+ } else {
+ snapshot.replay(collector);
+ }
+ }
+
+ @Override
+ public int matchDistance() {
+ return (rightExtremeBegin) - (queue.currentTopEnd) -1; // align the match if pos are adjacent
+ }
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalIterator.java
new file mode 100644
index 0000000..be0c7d4
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalIterator.java
@@ -0,0 +1,113 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.intervals.IntervalQueue.IntervalRef;
+
+import java.io.IOException;
+
+/**
+ * DisjunctionPositionIterator based on minimal interval semantics for OR
+ * operator
+ *
+ * "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics"
+ *
+ * @lucene.experimental
+ */
+public final class DisjunctionIntervalIterator extends IntervalIterator {
+
+ private final IntervalQueue queue;
+ private final IntervalIterator[] iterators;
+
+ /**
+ * Creates a new DisjunctionIntervalIterator over a set of IntervalIterators
+ * @param scorer the parent Scorer
+ * @param collectIntervals true if intervals will be collected
+ * @param intervals the IntervalIterators to iterate over
+ * @throws IOException if a low-level I/O error is encountered
+ */
+ public DisjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, IntervalIterator... intervals)
+ throws IOException {
+ super(scorer, collectIntervals);
+ this.iterators = intervals;
+ queue = new IntervalQueueOr(intervals.length);
+ }
+
+ private void advance() throws IOException {
+ final IntervalRef top = queue.top();
+ Interval interval = null;
+ if ((interval = iterators[top.index].next()) != null) {
+ top.interval = interval;
+ queue.updateTop();
+ } else {
+ queue.pop();
+ }
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ while (queue.size() > 0 &&
+ (queue.top().interval.field.equals(queue.currentCandidate.field)) &&
+ (queue.top().interval.begin < queue.currentCandidate.begin ||
+ (queue.top().interval.begin == queue.currentCandidate.begin && queue.top().interval.end <= queue.currentCandidate.end))) {
+ advance();
+ }
+ if (queue.size() == 0) {
+ return null;
+ }
+ queue.updateCurrentCandidate();
+ return queue.currentCandidate; // TODO support payloads
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return iterators;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ assert collectIntervals;
+ collector.collectComposite(scorer, queue.currentCandidate, docID());
+ iterators[queue.top().index].collect(collector);
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ queue.reset();
+ for (int i = 0; i < iterators.length; i++) {
+ if (iterators[i].docID() <= docId) {
+ int scorerAdvanced = iterators[i].scorerAdvanced(docId);
+ //assert iterators[i].docID() == scorerAdvanced : " " + iterators[i];
+ }
+ if (iterators[i].docID() == docId) {
+ Interval interval = iterators[i].next();
+ if (interval != null)
+ queue.add(new IntervalRef(interval, i));
+ }
+ }
+ return this.docID();
+ }
+
+ @Override
+ public int matchDistance() {
+ return iterators[queue.top().index].matchDistance();
+ }
+
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/Interval.java b/lucene/core/src/java/org/apache/lucene/search/intervals/Interval.java
new file mode 100644
index 0000000..08a02bc
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/Interval.java
@@ -0,0 +1,197 @@
+package org.apache.lucene.search.intervals;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Represents a section of a document that matches a query
+ */
+public class Interval implements Cloneable {
+
+ /** The position of the start of this Interval */
+ public int begin;
+
+ /** The position of the end of this Interval */
+ public int end;
+
+ /** The offset of the start of this Interval */
+ public int offsetBegin;
+
+ /** The offset of the end of this Interval */
+ public int offsetEnd;
+
+ /** The field this interval is on */
+ public String field;
+
+ /** An interval that will always compare as less than any other interval */
+ public static final Interval INFINITE_INTERVAL = new Interval();
+
+ /**
+ * Constructs a new Interval
+ * @param begin the start position
+ * @param end the end position
+ * @param offsetBegin the start offset
+ * @param offsetEnd the end offset
+ */
+ public Interval(int begin, int end, int offsetBegin, int offsetEnd, String field) {
+ this.begin = begin;
+ this.end = end;
+ this.offsetBegin = offsetBegin;
+ this.offsetEnd = offsetEnd;
+ this.field = field;
+ }
+
+ /**
+ * Constructs a new Interval with no initial values. This
+ * will always compare as less than any other Interval.
+ */
+ public Interval() {
+ this("");
+ }
+
+ public Interval(String field) {
+ this(Integer.MIN_VALUE, Integer.MIN_VALUE, -1, -1, field);
+ }
+
+ /**
+ * Update to span the range defined by two other Intervals.
+ * @param start the first Interval
+ * @param end the second Interval
+ */
+ public void update(Interval start, Interval end) {
+ assert start.field == end.field;
+ this.begin = start.begin;
+ this.offsetBegin = start.offsetBegin;
+ this.end = end.end;
+ this.offsetEnd = end.offsetEnd;
+ }
+
+ /**
+ * Compare with another Interval.
+ * @param other the comparator
+ * @return true if both start and end positions are less than
+ * the comparator.
+ */
+ public boolean lessThanExclusive(Interval other) {
+ //assert field == other.field;
+ return begin < other.begin && end < other.end;
+ }
+
+ /**
+ * Compare with another Interval.
+ * @param other the comparator
+ * @return true if both start and end positions are less than
+ * or equal to the comparator's.
+ */
+ public boolean lessThan(Interval other) {
+ //assert field == other.field;
+ return begin <= other.begin && end <= other.end;
+ }
+
+ /**
+ * Compare with another Interval
+ * @param other the comparator
+ * @return true if both start and end positions are greater then
+ * the comparator's.
+ */
+ public boolean greaterThanExclusive(Interval other) {
+ assert field == other.field;
+ return begin > other.begin && end > other.end;
+ }
+
+ /**
+ * Compare with another Interval
+ * @param other the comparator
+ * @return true if both start and end positions are greater then
+ * of equal to the comparator's.
+ */
+ public boolean greaterThan(Interval other) {
+ assert field == other.field;
+ return begin >= other.begin && end >= other.end;
+ }
+
+ /**
+ * Compare with another Interval
+ * @param other the comparator
+ * @return true if this Interval contains the comparator
+ */
+ public boolean contains(Interval other) {
+ assert field == other.field;
+ return begin <= other.begin && other.end <= end;
+ }
+
+ /**
+ * Compare with another Interval to find overlaps
+ * @param other
+ * @return true if the two intervals overlap
+ */
+ public boolean overlaps(Interval other) {
+ //assert field == other.field;
+ return this.contains(other) || other.contains(this);
+ }
+
+ public boolean strictlyLessThan(Interval other) {
+ return this.field.compareTo(other.field) < 0
+ || this.field.equals(other.field) && this.begin < other.begin
+ || this.begin == other.begin && this.end <= other.end;
+ }
+
+ /**
+ * Set all values of this Interval to be equal to another's
+ * @param other the Interval to copy
+ */
+ public void copy(Interval other) {
+ begin = other.begin;
+ end = other.end;
+ offsetBegin = other.offsetBegin;
+ offsetEnd = other.offsetEnd;
+ field = other.field;
+ }
+
+ /**
+ * Set to a state that will always compare as less than any
+ * other Interval.
+ */
+ public void reset() {
+ offsetBegin = offsetEnd = -1;
+ begin = end = Integer.MIN_VALUE;
+ }
+
+ /**
+ * Set to a state that will always compare as more than any
+ * other Interval.
+ */
+ public void setMaximum() {
+ offsetBegin = offsetEnd = -1;
+ begin = end = Integer.MAX_VALUE;
+ }
+
+ @Override
+ public Object clone() {
+ try {
+ return super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException(); // should not happen
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "Interval [field=" + field + " begin=" + begin + "(" + offsetBegin + "), end="
+ + end + "(" + offsetEnd + ")]";
+ }
+
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalCollector.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalCollector.java
new file mode 100644
index 0000000..9ddc3f8
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalCollector.java
@@ -0,0 +1,43 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Scorer;
+
+/**
+ * Used for collecting matching {@link Interval}s from a search
+ */
+public interface IntervalCollector {
+
+ /**
+ * Collects an individual term match
+ * @param scorer the parent scorer
+ * @param interval the interval to collect
+ * @param docID the docID of the document matched
+ */
+ public void collectLeafPosition(Scorer scorer, Interval interval, int docID);
+
+ /**
+ * Collects a composite interval that may have sub-intervals
+ * @param scorer the parent scorer
+ * @param interval the interval to collect
+ * @param docID the docID of the document matched
+ */
+ public void collectComposite(Scorer scorer, Interval interval, int docID);
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilter.java
new file mode 100644
index 0000000..9e8531a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilter.java
@@ -0,0 +1,36 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Filters an {@link IntervalIterator}
+ *
+ * @see IntervalFilterQuery
+ */
+public interface IntervalFilter {
+
+ /**
+ * Filter the passed in IntervalIterator
+ * @param collectIntervals true if the returned {@link IntervalIterator} will
+ * be passed to an {@link IntervalCollector}
+ * @param iter the {@link IntervalIterator} to filter
+ * @return a filtered {@link IntervalIterator}
+ */
+ public abstract IntervalIterator filter(boolean collectIntervals, IntervalIterator iter);
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilterQuery.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilterQuery.java
new file mode 100644
index 0000000..7236b06
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilterQuery.java
@@ -0,0 +1,420 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ComplexExplanation;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.search.Weight.PostingFeatures;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.Bits;
+
+import java.io.IOException;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * A Query that filters the results of an inner {@link Query} using an
+ * {@link IntervalFilter}.
+ *
+ * @see OrderedNearQuery
+ * @see UnorderedNearQuery
+ * @see NonOverlappingQuery
+ *
+ * @lucene.experimental
+ */
+public class IntervalFilterQuery extends Query implements Cloneable {
+
+ public static BooleanQuery createFieldConjunction(Query... subqueries) {
+ if (subqueries.length == 0)
+ throw new IllegalArgumentException("Cannot create empty conjunction");
+ BooleanQuery bq = new BooleanQuery();
+ for (Query query : subqueries) {
+ bq.add(query, BooleanClause.Occur.MUST);
+ }
+ ensureSingleFielded(bq);
+ return bq;
+ }
+
+ public static String ensureSingleFielded(Query query) {
+ if (query.getFields().size() != 1)
+ throw new IllegalArgumentException("Query must have a single field: found " + query.getFields());
+ return query.getFields().iterator().next();
+ }
+
+ public static BooleanQuery createConjunction(Query... subqueries) {
+ BooleanQuery bq = new BooleanQuery();
+ for (Query query : subqueries) {
+ bq.add(query, BooleanClause.Occur.MUST);
+ }
+ return bq;
+ }
+
+ private Query inner;
+ private final IntervalFilter filter;
+
+ /**
+ * Constructs a query using an inner query and an IntervalFilter
+ * @param inner the query to wrap
+ * @param filter the filter to restrict results by
+ */
+ public IntervalFilterQuery(Query inner, IntervalFilter filter) {
+ this.inner = inner;
+ this.filter = filter;
+ }
+
+ @Override
+ public void extractTerms(Set terms) {
+ inner.extractTerms(terms);
+ }
+
+ @Override
+ public Set getFields() {
+ return inner.getFields();
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ IntervalFilterQuery clone = null;
+
+ Query rewritten = inner.rewrite(reader);
+ if (rewritten != inner) {
+ clone = (IntervalFilterQuery) this.clone();
+ clone.inner = rewritten;
+ }
+
+ if (clone != null) {
+ return clone; // some clauses rewrote
+ } else {
+ return this; // no clauses rewrote
+ }
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher) throws IOException {
+ return new IntervalFilterWeight(inner.createWeight(searcher), searcher);
+ }
+
+ class IntervalFilterWeight extends Weight {
+
+ private final Weight other;
+ private final Similarity similarity;
+ private final Similarity.SimWeight stats;
+
+ public IntervalFilterWeight(Weight other, IndexSearcher searcher) throws IOException {
+ this.other = other;
+ this.similarity = searcher.getSimilarity();
+ this.stats = getSimWeight(other.getQuery(), searcher);
+ }
+
+ private Similarity.SimWeight getSimWeight(Query query, IndexSearcher searcher) throws IOException {
+ TreeSet terms = new TreeSet();
+ query.extractTerms(terms);
+ if (terms.size() == 0)
+ return null;
+ int i = 0;
+ TermStatistics[] termStats = new TermStatistics[terms.size()];
+ for (Term term : terms) {
+ TermContext state = TermContext.build(searcher.getTopReaderContext(), term);
+ termStats[i] = searcher.termStatistics(term, state);
+ i++;
+ }
+ final String field = terms.first().field(); // nocommit - should we be checking all filtered terms
+ // are on the same field?
+ return similarity.computeWeight(query.getBoost(), searcher.collectionStatistics(field), termStats);
+
+ }
+
+ @Override
+ public Explanation explain(AtomicReaderContext context, int doc)
+ throws IOException {
+ Scorer scorer = scorer(context, PostingFeatures.POSITIONS,
+ context.reader().getLiveDocs());
+ if (scorer != null) {
+ int newDoc = scorer.advance(doc);
+ if (newDoc == doc) {
+ float freq = scorer.freq();
+ Similarity.SimScorer docScorer = similarity.simScorer(stats, context);
+ ComplexExplanation result = new ComplexExplanation();
+ result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
+ Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
+ result.addDetail(scoreExplanation);
+ result.setValue(scoreExplanation.getValue());
+ result.setMatch(true);
+ return result;
+ }
+ }
+ return new ComplexExplanation(false, 0.0f,
+ "No matching term within position filter");
+ }
+
+ @Override
+ public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException {
+ if (stats == null)
+ return null;
+ flags = flags == PostingFeatures.DOCS_AND_FREQS ? PostingFeatures.POSITIONS : flags;
+ ScorerFactory factory = new ScorerFactory(other, context, flags, acceptDocs);
+ final Scorer scorer = factory.scorer();
+ Similarity.SimScorer docScorer = similarity.simScorer(stats, context);
+ return scorer == null ? null : new IntervalFilterScorer(this, scorer, factory, docScorer);
+ }
+
+ @Override
+ public Query getQuery() {
+ return IntervalFilterQuery.this;
+ }
+
+ @Override
+ public float getValueForNormalization() throws IOException {
+ return stats == null ? 1.0f : stats.getValueForNormalization();
+ }
+
+ @Override
+ public void normalize(float norm, float topLevelBoost) {
+ if (stats != null)
+ stats.normalize(norm, topLevelBoost);
+ }
+ }
+
+ static class ScorerFactory {
+ final Weight weight;
+ final AtomicReaderContext context;
+ final PostingFeatures flags;
+ final Bits acceptDocs;
+ ScorerFactory(Weight weight,
+ AtomicReaderContext context, PostingFeatures flags,
+ Bits acceptDocs) {
+ this.weight = weight;
+ this.context = context;
+ this.flags = flags;
+ this.acceptDocs = acceptDocs;
+ }
+
+ public Scorer scorer() throws IOException {
+ return weight.scorer(context, flags, acceptDocs);
+ }
+
+ }
+
+ final class IntervalFilterScorer extends Scorer {
+
+ private final Scorer other;
+ private IntervalIterator filter;
+ private Interval current;
+ private final ScorerFactory factory;
+ private final Similarity.SimScorer docScorer;
+
+ public IntervalFilterScorer(Weight weight, Scorer other, ScorerFactory factory,
+ Similarity.SimScorer docScorer) throws IOException {
+ super(weight);
+ this.other = other;
+ this.factory = factory;
+ this.filter = IntervalFilterQuery.this.filter.filter(false, other.intervals(false));
+ this.docScorer = docScorer;
+ }
+
+ @Override
+ public float score() throws IOException {
+ return docScorer.score(docID(), freq());
+ }
+
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ if (collectIntervals) {
+ final Scorer collectingScorer = factory.scorer();
+ final IntervalIterator filter = IntervalFilterQuery.this.filter.filter(true, collectingScorer.intervals(true));
+ return new IntervalIterator(this, true) {
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ //System.out.println("IntervalIterator: advancing from " + collectingScorer.docID() + " to " + docId);
+ if (collectingScorer.docID() >= docId) {
+ return collectingScorer.docID();
+ }
+ int target = collectingScorer.advance(docId);
+ if (target == NO_MORE_DOCS)
+ return NO_MORE_DOCS;
+ return filter.scorerAdvanced(target);
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ return filter.next();
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ filter.collect(collector);
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return filter.subs(inOrder);
+ }
+
+ @Override
+ public int matchDistance() {
+ return filter.matchDistance();
+ }
+
+ @Override
+ public int docID() {
+ return filter.docID();
+ }
+
+ @Override
+ public String toString() {
+ return IntervalFilterQuery.this.toString(null) + "[" + filter + "]";
+ }
+
+ };
+ }
+
+ return new IntervalIterator(this, collectIntervals) {
+ private boolean buffered = true;
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ buffered = true;
+ assert docId == filter.docID();
+ return docId;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ if (buffered) {
+ buffered = false;
+ return current;
+ }
+ else if (current != null) {
+ return current = filter.next();
+ }
+ return null;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ filter.collect(collector);
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return filter.subs(inOrder);
+ }
+
+ @Override
+ public int matchDistance() {
+ return filter.matchDistance();
+ }
+
+ };
+ }
+
+ @Override
+ public int docID() {
+ return other.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ int docId = -1;
+ while ((docId = other.nextDoc()) != Scorer.NO_MORE_DOCS) {
+ filter.scorerAdvanced(docId);
+ if ((current = filter.next()) != null) { // just check if there is at least one interval that matches!
+ return other.docID();
+ }
+ }
+ return Scorer.NO_MORE_DOCS;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ int docId = other.advance(target);
+ if (docId == Scorer.NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+ do {
+ filter.scorerAdvanced(docId);
+ if ((current = filter.next()) != null) {
+ return other.docID();
+ }
+ } while ((docId = other.nextDoc()) != Scorer.NO_MORE_DOCS);
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ public long cost() {
+ return other.cost();
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return 1; // nocommit how to calculate frequency?
+ }
+
+ public float sloppyFreq() throws IOException {
+ float freq = 0.0f;
+ do {
+ int d = filter.matchDistance();
+ freq += docScorer.computeSlopFactor(d);
+ }
+ while (filter.next() != null);
+ return freq;
+ }
+
+ }
+
+ @Override
+ public String toString(String field) {
+ return "Filtered/" + filter.toString() + "(" + inner.toString() + ")";
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + ((filter == null) ? 0 : filter.hashCode());
+ result = prime * result + ((inner == null) ? 0 : inner.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (!super.equals(obj)) return false;
+ if (getClass() != obj.getClass()) return false;
+ IntervalFilterQuery other = (IntervalFilterQuery) obj;
+ if (filter == null) {
+ if (other.filter != null) return false;
+ } else if (!filter.equals(other.filter)) return false;
+ if (inner == null) {
+ if (other.inner != null) return false;
+ } else if (!inner.equals(other.inner)) return false;
+ return true;
+ }
+
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalIterator.java
new file mode 100644
index 0000000..5e53e9e
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalIterator.java
@@ -0,0 +1,148 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.search.Scorer;
+
+import java.io.IOException;
+
+/**
+ * Iterator over the matching {@link Interval}s of a {@link Scorer}
+ *
+ * @lucene.experimental
+ */
+public abstract class IntervalIterator {
+
+ /** An empty array of IntervalIterators */
+ public static final IntervalIterator[] EMPTY = new IntervalIterator[0];
+
+ /** An IntervalIterator containing no further Intervals */
+ public static final IntervalIterator NO_MORE_INTERVALS = new EmptyIntervalIterator();
+
+ /** Integer representing no more documents */
+ public static final int NO_MORE_DOCS = Integer.MAX_VALUE;
+
+ protected final Scorer scorer;
+ protected final boolean collectIntervals;
+
+ /**
+ * Constructs an IntervalIterator over a {@link Scorer}
+ * @param scorer the {@link Scorer} to pull positions from
+ * @param collectIntervals true if positions will be collected
+ */
+ public IntervalIterator(Scorer scorer, boolean collectIntervals) {
+ this.scorer = scorer;
+ this.collectIntervals = collectIntervals;
+ }
+
+ /**
+ * Called after the parent scorer has been advanced. If the scorer is
+ * currently positioned on docId, then subsequent calls to next() will
+ * return Intervals for that document; otherwise, no Intervals are
+ * available
+ * @param docId the document the parent scorer was advanced to
+ * @return the docId that the scorer is currently positioned at
+ * @throws IOException if a low-level I/O error is encountered
+ */
+ public abstract int scorerAdvanced(int docId) throws IOException;
+
+ /**
+ * Get the next Interval on the current document.
+ * @return the next Interval, or null if there are no remaining Intervals
+ * @throws IOException if a low-level I/O error is encountered
+ */
+ public abstract Interval next() throws IOException;
+
+ /**
+ * If intervals are to be collected, this will be called once
+ * for each Interval returned by the iterator. The constructor
+ * must have been called with collectIntervals=true.
+ * @param collector an {@link IntervalCollector} to collect the
+ * Interval positions
+ * @see Scorer#intervals(boolean)
+ */
+ public abstract void collect(IntervalCollector collector);
+
+ /**
+ * Get any sub-iterators
+ *
+ * @param inOrder
+ * true if the sub-iterators should be returned in the same order the
+ * queries were provided
+ */
+ public abstract IntervalIterator[] subs(boolean inOrder);
+
+ /**
+ * Get the distance between matching subintervals
+ */
+ public abstract int matchDistance();
+
+ /**
+ * Get the current docID
+ */
+ public int docID() {
+ return scorer.docID();
+ }
+
+ /**
+ * Get this iterator's {@link Scorer}
+ */
+ public Scorer getScorer() {
+ return scorer;
+ }
+
+ /**
+ * An iterator that is always exhausted
+ */
+ private static final class EmptyIntervalIterator extends
+ IntervalIterator {
+
+ public EmptyIntervalIterator() {
+ super(null, false);
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ return IntervalIterator.NO_MORE_DOCS;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ return null;
+ }
+
+ @Override
+ public void collect(IntervalCollector collectoc) {}
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return EMPTY;
+ }
+
+ @Override
+ public int matchDistance() {
+ return Integer.MAX_VALUE;
+ }
+
+ @Override
+ public int docID() {
+ return IntervalIterator.NO_MORE_DOCS;
+ }
+
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueue.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueue.java
new file mode 100644
index 0000000..76bc8cb
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueue.java
@@ -0,0 +1,71 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.search.intervals.IntervalQueue.IntervalRef;
+import org.apache.lucene.util.PriorityQueue;
+
+/**
+ * Abstract base class for calculating minimal spanning intervals with Queues.
+ * @see IntervalQueueAnd
+ *
+ * @lucene.experimental
+ * @lucene.internal
+ */
+abstract class IntervalQueue extends PriorityQueue {
+ /**
+ * The current interval spanning the queue
+ */
+ final Interval currentCandidate;
+
+ /**
+ * Creates a new {@link IntervalQueue} with a fixed size
+ * @param size the size of the queue
+ */
+ public IntervalQueue(int size) {
+ super(size);
+ currentCandidate = new Interval();
+ }
+
+ /**
+ * Clears and resets the queue to its initial values;
+ */
+ void reset() {
+ clear();
+ currentCandidate.reset();
+ }
+
+ /**
+ * Called by the consumer each time the head of the queue was updated
+ */
+ abstract void updateCurrentCandidate();
+
+ /**
+ * Holds a reference to an interval and its index.
+ */
+ final static class IntervalRef {
+ Interval interval;
+ final int index;
+
+ IntervalRef(Interval interval, int index) {
+ super();
+ this.interval = interval;
+ this.index = index;
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueAnd.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueAnd.java
new file mode 100644
index 0000000..3d35d70
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueAnd.java
@@ -0,0 +1,90 @@
+package org.apache.lucene.search.intervals;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Queue class for calculating minimal spanning conjunction intervals
+ * @lucene.experimental
+ */
+final class IntervalQueueAnd extends IntervalQueue {
+
+ /** the current right extreme positions of the queue */
+ int rightExtreme = Integer.MIN_VALUE;
+ /** the current right extreme offset of the queue */
+ int rightExtremeOffset = Integer.MIN_VALUE;
+ /** the current right extreme begin position*/
+ int rightExtremeBegin;
+ /** the end of the internval on top of the queue*/
+ int currentTopEnd;
+
+ /**
+ * Creates a new {@link IntervalQueueAnd} with a fixed size
+ * @param size the size of the queue
+ */
+ IntervalQueueAnd(int size) {
+ super(size);
+ }
+
+ @Override
+ void reset () {
+ super.reset();
+ rightExtreme = Integer.MIN_VALUE;
+ rightExtremeOffset = Integer.MIN_VALUE;
+ }
+
+ /**
+ * Updates the right extreme of this queue if the end of the given interval is
+ * greater or equal than the current right extreme of the queue.
+ *
+ * @param intervalRef the interval to compare
+ */
+ void updateRightExtreme(IntervalRef intervalRef) {
+ final Interval interval = intervalRef.interval;
+ if (rightExtreme <= interval.end) {
+ rightExtreme = interval.end;
+ rightExtremeOffset = interval.offsetEnd;
+ rightExtremeBegin = interval.begin;
+ }
+ }
+
+ @Override
+ void updateCurrentCandidate() {
+ final IntervalRef top = top();
+ Interval interval = top.interval;
+ currentCandidate.begin = interval.begin;
+ currentCandidate.offsetBegin = interval.offsetBegin;
+ currentCandidate.end = rightExtreme;
+ currentCandidate.offsetEnd = rightExtremeOffset;
+ currentCandidate.field = interval.field;
+ currentTopEnd = interval.end;
+
+ }
+
+ @Override
+ protected boolean lessThan(IntervalRef left, IntervalRef right) {
+ final Interval a = left.interval;
+ final Interval b = right.interval;
+ if (a.field == null)
+ return true;
+ if (b.field == null)
+ return false;
+ if (a.field.equals(b.field))
+ return a.begin < b.begin || (a.begin == b.begin && a.end > b.end) || a.offsetBegin < b.offsetBegin;
+ return (a.field.compareTo(b.field)) < 0;
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueOr.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueOr.java
new file mode 100644
index 0000000..a99d91a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueOr.java
@@ -0,0 +1,51 @@
+package org.apache.lucene.search.intervals;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Queue class for calculating minimal spanning disjunct intervals
+ * @lucene.experimental
+ */
+final class IntervalQueueOr extends IntervalQueue {
+
+ /**
+ * Creates a new {@link IntervalQueueOr} with a fixed size
+ * @param size the size of the queue
+ */
+ IntervalQueueOr(int size) {
+ super(size);
+ }
+
+ @Override
+ void updateCurrentCandidate() {
+ currentCandidate.copy(top().interval);
+ }
+
+ @Override
+ protected boolean lessThan(IntervalRef left, IntervalRef right) {
+ final Interval a = left.interval;
+ final Interval b = right.interval;
+ if (a.field == null)
+ return true;
+ if (b.field == null)
+ return false;
+ if (a.field.equals(b.field))
+ return a.begin < b.begin || (a.begin == b.begin && a.end < b.end);
+ return (a.field.compareTo(b.field)) < 0;
+ //return a.end < b.end || (a.end == b.end && a.begin >= b.begin);
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/MinFrequencyFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/MinFrequencyFilter.java
new file mode 100644
index 0000000..98f6fc4
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/MinFrequencyFilter.java
@@ -0,0 +1,106 @@
+package org.apache.lucene.search.intervals;
+
+import java.io.IOException;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class MinFrequencyFilter implements IntervalFilter {
+
+ private final int minFreq;
+
+ public MinFrequencyFilter(int minFreq) {
+ this.minFreq = minFreq;
+ }
+
+ @Override
+ public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) {
+ return new MinFrequencyIntervalIterator(minFreq, iter, collectIntervals);
+ }
+
+ public static class MinFrequencyIntervalIterator extends IntervalIterator {
+
+ private final IntervalIterator subIter;
+ private final Interval[] intervalCache;
+ private final int[] distanceCache;
+
+ private int cachePos = -1;
+ private int freq = -1;
+
+ public MinFrequencyIntervalIterator(int minFreq, IntervalIterator iter, boolean collectIntervals) {
+ super(iter == null ? null : iter.scorer, collectIntervals);
+ this.subIter = iter;
+ this.intervalCache = new Interval[minFreq];
+ for (int i = 0; i < minFreq; i++) {
+ this.intervalCache[i] = new Interval();
+ }
+ this.distanceCache = new int[minFreq];
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ cachePos = -1;
+ return subIter.scorerAdvanced(docId);
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ if (cachePos == -1)
+ freq = loadIntervalCache();
+ if (freq < intervalCache.length)
+ return null;
+ cachePos++;
+ if (cachePos < intervalCache.length)
+ return intervalCache[cachePos];
+ return subIter.next();
+ }
+
+ private int loadIntervalCache() throws IOException {
+ int f = 0;
+ Interval interval;
+ while (f < intervalCache.length && (interval = subIter.next()) != null) {
+ intervalCache[f].copy(interval);
+ f++;
+ }
+ return f;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ if (cachePos < distanceCache.length)
+ collector.collectComposite(null, intervalCache[cachePos], subIter.docID());
+ subIter.collect(collector);
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return new IntervalIterator[]{ subIter };
+ }
+
+ @Override
+ public int matchDistance() {
+ if (cachePos < distanceCache.length)
+ return distanceCache[cachePos];
+ return subIter.matchDistance();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "MINFREQ(" + minFreq + ")";
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/NonOverlappingQuery.java b/lucene/core/src/java/org/apache/lucene/search/intervals/NonOverlappingQuery.java
new file mode 100644
index 0000000..efadb54
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/NonOverlappingQuery.java
@@ -0,0 +1,365 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.search.Weight.PostingFeatures;
+import org.apache.lucene.util.Bits;
+
+import java.io.IOException;
+import java.util.Set;
+
+/**
+ * A Query that matches documents containing an interval (the minuend) that
+ * does not contain another interval (the subtrahend).
+ *
+ * As an example, given the following {@link org.apache.lucene.search.BooleanQuery}:
+ *
+ *
+ * The document "the quick brown fox" will be matched by this query. But
+ * create a NonOverlappingQuery using this query as a minuend:
+ *
+ * NonOverlappingQuery brq = new NonOverlappingQuery(bq, new TermQuery(new Term(field, "brown")));
+ *
+ *
+ * This query will not match "the quick brown fox", because "brown" is found
+ * within the interval of the boolean query for "quick" and "fox. The query
+ * will match "the quick fox is brown", because here "brown" is outside
+ * the minuend's interval.
+ *
+ * N.B. Positions must be included in the index for this query to work
+ *
+ * Implements the Brouwerian operator as defined in "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics"
+ *
+ * @lucene.experimental
+ * @see BrouwerianIntervalIterator
+ */
+public final class NonOverlappingQuery extends Query implements Cloneable {
+
+ private final Query minuend;
+ private final Query subtrahend;
+ private final String field;
+
+ /**
+ * Constructs a Query that matches documents containing intervals of the minuend
+ * that are not subtended by the subtrahend
+ * @param minuend the minuend Query
+ * @param subtrahend the subtrahend Query
+ */
+ public NonOverlappingQuery(Query minuend, Query subtrahend) {
+ this.minuend = minuend;
+ this.subtrahend = subtrahend;
+ if (minuend.getFields().size() != 1)
+ throw new IllegalArgumentException("Minuend query must be defined on a single field: found " + minuend.getFields());
+ if (subtrahend.getFields().size() != 1)
+ throw new IllegalArgumentException("Subtrahend query must be defined on a single field: found " + subtrahend.getFields());
+ if (!minuend.getFields().containsAll(subtrahend.getFields()))
+ throw new IllegalArgumentException("Minuend and subtrahend must be on the same field: found "
+ + minuend.getFields() + " and " + subtrahend.getFields());
+ field = minuend.getFields().iterator().next();
+ }
+
+ @Override
+ public void extractTerms(Set terms) {
+ minuend.extractTerms(terms);
+ subtrahend.extractTerms(terms);
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ Query rewritten = minuend.rewrite(reader);
+ Query subRewritten = subtrahend.rewrite(reader);
+ if (rewritten != minuend || subRewritten != subtrahend) {
+ return new NonOverlappingQuery(rewritten, subRewritten);
+ }
+ return this;
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher) throws IOException {
+ return new BrouwerianQueryWeight(minuend.createWeight(searcher), subtrahend.createWeight(searcher));
+ }
+
+ class BrouwerianQueryWeight extends Weight {
+
+ private final Weight minuted;
+ private final Weight subtracted;
+
+ public BrouwerianQueryWeight(Weight minuted, Weight subtracted) {
+ this.minuted = minuted;
+ this.subtracted = subtracted;
+ }
+
+ @Override
+ public Explanation explain(AtomicReaderContext context, int doc)
+ throws IOException {
+ return minuted.explain(context, doc);
+ }
+
+ @Override
+ public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException {
+ flags = flags == PostingFeatures.DOCS_AND_FREQS ? PostingFeatures.POSITIONS : flags;
+ ScorerFactory factory = new ScorerFactory(minuted, subtracted, context, flags, acceptDocs);
+ final Scorer scorer = factory.minutedScorer();
+ final Scorer subScorer = factory.subtractedScorer();
+ if (subScorer == null) {
+ return scorer;
+ }
+ return scorer == null ? null : new BrouwerianScorer(this, scorer, subScorer, factory);
+ }
+
+ @Override
+ public Query getQuery() {
+ return NonOverlappingQuery.this;
+ }
+
+ @Override
+ public float getValueForNormalization() throws IOException {
+ return minuted.getValueForNormalization();
+ }
+
+ @Override
+ public void normalize(float norm, float topLevelBoost) {
+ minuted.normalize(norm, topLevelBoost);
+ }
+ }
+
+ static class ScorerFactory {
+ final Weight minuted;
+ final Weight subtracted;
+ final AtomicReaderContext context;
+ final PostingFeatures flags;
+ final Bits acceptDocs;
+ ScorerFactory(Weight minuted, Weight subtracted,
+ AtomicReaderContext context, PostingFeatures flags,
+ Bits acceptDocs) {
+ this.minuted = minuted;
+ this.subtracted = subtracted;
+ this.context = context;
+ this.flags = flags;
+ this.acceptDocs = acceptDocs;
+ }
+
+ public Scorer minutedScorer() throws IOException {
+ return minuted.scorer(context, flags, acceptDocs);
+ }
+
+ public Scorer subtractedScorer() throws IOException {
+ return subtracted.scorer(context, flags, acceptDocs);
+ }
+
+ }
+
+ final class BrouwerianScorer extends Scorer {
+
+ private final Scorer minuend;
+ private IntervalIterator filter;
+ private final Scorer subtracted;
+ Interval current;
+ private final ScorerFactory factory;
+
+ public BrouwerianScorer(Weight weight, Scorer minuend, Scorer subtracted, ScorerFactory factory) throws IOException {
+ super(weight);
+ this.minuend = minuend;
+ this.subtracted = subtracted;
+ this.filter = new BrouwerianIntervalIterator(minuend, false, minuend.intervals(false),
+ subtracted.intervals(false), field);
+ this.factory = factory;
+ }
+
+ @Override
+ public float score() throws IOException {
+ return minuend.score();
+ }
+
+ @Override
+ public IntervalIterator intervals(boolean collectIntervals) throws IOException {
+ if (collectIntervals) {
+ final Scorer minuted = factory.minutedScorer();
+ final Scorer subtracted = factory.subtractedScorer();
+ final BrouwerianIntervalIterator brouwerianIntervalIterator
+ = new BrouwerianIntervalIterator(subtracted, true, minuted.intervals(true), subtracted.intervals(true), field);
+ return new IntervalIterator(this, collectIntervals) {
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ //if (docId == this.docID())
+ // return docId;
+ int mId = minuted.advance(docId);
+ if (subtracted.docID() < docId)
+ subtracted.advance(docId);
+ if (mId <= docId)
+ return brouwerianIntervalIterator.scorerAdvanced(docId);
+ return mId;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ return brouwerianIntervalIterator.next();
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ brouwerianIntervalIterator.collect(collector);
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return brouwerianIntervalIterator.subs(inOrder);
+ }
+
+ @Override
+ public int matchDistance() {
+ return brouwerianIntervalIterator.matchDistance();
+ }
+
+ };
+ }
+
+
+
+ return new IntervalIterator(this, false) {
+ private boolean buffered = true;
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ buffered = true;
+ assert docId == filter.docID();
+ return docId;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ if (buffered) {
+ buffered = false;
+ return current;
+ }
+ else if (current != null) {
+ return current = filter.next();
+ }
+ return null;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ filter.collect(collector);
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return filter.subs(inOrder);
+ }
+
+ @Override
+ public int matchDistance() {
+ return filter.matchDistance();
+ }
+
+ };
+ }
+
+ @Override
+ public int docID() {
+ return minuend.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ int docId = -1;
+ while ((docId = minuend.nextDoc()) != Scorer.NO_MORE_DOCS) {
+ if (subtracted.docID() < docId)
+ subtracted.advance(docId);
+ filter.scorerAdvanced(docId);
+ if ((current = filter.next()) != null) { // just check if there is a position that matches!
+ return minuend.docID();
+ }
+ }
+ return Scorer.NO_MORE_DOCS;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ int docId = minuend.advance(target);
+ subtracted.advance(docId);
+ if (docId == Scorer.NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+ do {
+ filter.scorerAdvanced(docId);
+ if ((current = filter.next()) != null) {
+ return minuend.docID();
+ }
+ } while ((docId = minuend.nextDoc()) != Scorer.NO_MORE_DOCS);
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ public long cost() {
+ return minuend.cost();
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return minuend.freq();
+ }
+
+ }
+
+ @Override
+ public String toString(String field) {
+ return "NonOverlappingQuery[" + minuend + ", " + subtrahend + "]";
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + ((minuend == null) ? 0 : minuend.hashCode());
+ result = prime * result
+ + ((subtrahend == null) ? 0 : subtrahend.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (!super.equals(obj)) return false;
+ if (getClass() != obj.getClass()) return false;
+ NonOverlappingQuery other = (NonOverlappingQuery) obj;
+ if (minuend == null) {
+ if (other.minuend != null) return false;
+ } else if (!minuend.equals(other.minuend)) return false;
+ if (subtrahend == null) {
+ if (other.subtrahend != null) return false;
+ } else if (!subtrahend.equals(other.subtrahend)) return false;
+ return true;
+ }
+
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedConjunctionIntervalIterator.java
new file mode 100644
index 0000000..1941bbc
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedConjunctionIntervalIterator.java
@@ -0,0 +1,169 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.search.Scorer;
+
+import java.io.IOException;
+
+/**
+ * An IntervalIterator based on minimum interval semantics for the
+ * AND< operator
+ *
+ * See "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics"
+ *
+ * @lucene.experimental
+ */
+public final class OrderedConjunctionIntervalIterator extends
+ IntervalIterator {
+
+ private final IntervalIterator[] iterators;
+ private final Interval[] intervals;
+ private final int lastIter;
+ private final Interval interval;
+
+ private int index = 1;
+ private int matchDistance = 0;
+
+ private SnapshotPositionCollector snapshot = null;
+ private boolean collectLeaves = true;
+
+ /**
+ * Create an OrderedConjunctionIntervalIterator over a composite IntervalIterator
+ * @param collectIntervals true if intervals will be collected
+ * @param other a composite IntervalIterator to wrap
+ */
+ public OrderedConjunctionIntervalIterator(boolean collectIntervals, boolean collectLeaves, String field, IntervalIterator other) {
+ this(other.scorer, collectIntervals, field, other.subs(true));
+ this.collectLeaves = collectLeaves;
+ }
+
+ public OrderedConjunctionIntervalIterator(boolean collectIntervals, String field, IntervalIterator other) {
+ this(collectIntervals, true, field, other);
+ }
+
+ /**
+ * Create an OrderedConjunctionIntervalIterator over a set of subiterators
+ * @param scorer the parent Scorer
+ * @param collectIntervals true if intervals will be collected
+ * @param iterators the subintervals to wrap
+ */
+ public OrderedConjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, String field, IntervalIterator... iterators) {
+ super(scorer, collectIntervals);
+ this.iterators = iterators;
+ assert iterators.length > 1;
+ intervals = new Interval[iterators.length];
+ lastIter = iterators.length - 1;
+ this.interval = new Interval(field);
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ if(intervals[0] == null) {
+ return null;
+ }
+ interval.setMaximum();
+ int b = Integer.MAX_VALUE;
+ while (true) {
+ while (true) {
+ final Interval previous = intervals[index - 1];
+ if (previous.end >= b) {
+ return interval.begin == Integer.MAX_VALUE ? null : interval;
+ }
+ if (index == intervals.length || intervals[index].begin > previous.end) {
+ break;
+ }
+ Interval current = intervals[index];
+ do {
+ final Interval next;
+ if (current.end >= b || (next = iterators[index].next()) == null) {
+ return interval.begin == Integer.MAX_VALUE ? null : interval;
+ }
+ current = intervals[index] = next;
+ } while (current.begin <= previous.end);
+ index++;
+ }
+ interval.update(intervals[0], intervals[lastIter]);
+ matchDistance = (intervals[lastIter].begin - lastIter) - intervals[0].end;
+ b = intervals[lastIter].begin;
+ index = 1;
+ if (collectIntervals)
+ snapshotSubPositions();
+ intervals[0] = iterators[0].next();
+ if (intervals[0] == null) {
+ return interval.begin == Integer.MAX_VALUE ? null : interval;
+ }
+ }
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return iterators;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ assert collectIntervals;
+ if (snapshot == null) {
+ // we might not be initialized if the first interval matches
+ collectInternal(collector);
+ } else {
+ snapshot.replay(collector);
+ }
+ }
+
+ private void snapshotSubPositions() {
+ if (snapshot == null) {
+ snapshot = new SnapshotPositionCollector(iterators.length);
+ }
+ snapshot.reset();
+ collectInternal(snapshot);
+ }
+
+ private void collectInternal(IntervalCollector collector) {
+ assert collectIntervals;
+ collector.collectComposite(scorer, interval, docID());
+ if (collectLeaves) {
+ for (IntervalIterator iter : iterators) {
+ iter.collect(collector);
+ }
+ }
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ assert scorer.docID() == docId;
+ //System.out.println("OCI: scorerAdvanced start");
+ for (int i = 0; i < iterators.length; i++) {
+ //System.out.println("OCI: advancing from " + iterators[i].docID() + " to " + docId);
+ int advanceTo = iterators[i].scorerAdvanced(docId);
+ assert advanceTo == docId;
+ intervals[i] = Interval.INFINITE_INTERVAL;
+ }
+ intervals[0] = iterators[0].next();
+ index = 1;
+ return scorer.docID();
+ }
+
+ @Override
+ public int matchDistance() {
+ return matchDistance;
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedNearQuery.java
new file mode 100644
index 0000000..ee9edaf
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedNearQuery.java
@@ -0,0 +1,58 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Query;
+
+/**
+ * A query that matches if a set of subqueries also match, and are within
+ * a given distance of each other within the document. The subqueries
+ * must appear in the document in order.
+ *
+ * N.B. Positions must be included in the index for this query to work
+ *
+ * Implements the AND< operator as defined in "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics"
+ *
+ * @lucene.experimental
+ */
+
+public class OrderedNearQuery extends IntervalFilterQuery {
+
+ /**
+ * Constructs an OrderedNearQuery
+ * @param slop the maximum distance between the subquery matches
+ * @param collectLeaves false if only the master interval should be collected
+ * @param subqueries the subqueries to match.
+ */
+ public OrderedNearQuery(int slop, boolean collectLeaves, Query... subqueries) {
+ super(createFieldConjunction(subqueries),
+ new WithinOrderedFilter(ensureSingleFielded(subqueries[0]), slop, collectLeaves));
+ }
+
+ /**
+ * Constructs an OrderedNearQuery
+ * @param slop the maximum distance between the subquery matches
+ * @param subqueries the subqueries to match.
+ */
+ public OrderedNearQuery(int slop, Query... subqueries) {
+ this(slop, true, subqueries);
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/RangeFrequencyFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/RangeFrequencyFilter.java
new file mode 100644
index 0000000..b8eb0c5
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/RangeFrequencyFilter.java
@@ -0,0 +1,112 @@
+package org.apache.lucene.search.intervals;
+
+import java.io.IOException;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class RangeFrequencyFilter implements IntervalFilter {
+
+ private final int maxFreq;
+ private final int minFreq;
+
+ public RangeFrequencyFilter(int minFreq, int maxFreq) {
+ if (minFreq < 0)
+ throw new IllegalArgumentException("minFreq must be greater than 0");
+ if (maxFreq < minFreq)
+ throw new IllegalArgumentException("maxFreq must be greater than minFreq");
+ this.maxFreq = maxFreq;
+ this.minFreq = minFreq;
+ }
+
+ @Override
+ public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) {
+ return new MaxFrequencyIntervalIterator(minFreq, maxFreq, iter, collectIntervals);
+ }
+
+ public static class MaxFrequencyIntervalIterator extends IntervalIterator {
+
+ private final IntervalIterator subIter;
+ private final Interval[] intervalCache;
+ private final int[] distanceCache;
+ private final int minFreq;
+
+ private int cachePos = -1;
+ private int freq = -1;
+
+ public MaxFrequencyIntervalIterator(int minFreq, int maxFreq, IntervalIterator iter, boolean collectIntervals) {
+ super(iter == null ? null : iter.scorer, collectIntervals);
+ this.minFreq = minFreq;
+ this.subIter = iter;
+ this.intervalCache = new Interval[maxFreq];
+ for (int i = 0; i < maxFreq; i++) {
+ this.intervalCache[i] = new Interval();
+ }
+ this.distanceCache = new int[maxFreq];
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ cachePos = -1;
+ return subIter.scorerAdvanced(docId);
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ if (cachePos == -1)
+ freq = loadIntervalCache();
+ if (freq == -1 || freq < minFreq)
+ return null;
+ cachePos++;
+ if (cachePos < freq)
+ return intervalCache[cachePos];
+ return null;
+ }
+
+ private int loadIntervalCache() throws IOException {
+ int f = 0;
+ Interval interval;
+ while ((interval = subIter.next()) != null) {
+ if (f >= intervalCache.length)
+ return -1;
+ intervalCache[f].copy(interval);
+ f++;
+ }
+ return f;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ collector.collectComposite(null, intervalCache[cachePos], subIter.docID());
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return new IntervalIterator[]{ subIter };
+ }
+
+ @Override
+ public int matchDistance() {
+ return distanceCache[cachePos];
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "RANGEFREQ(" + minFreq + "," + maxFreq + ")";
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/RangeIntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/RangeIntervalFilter.java
new file mode 100644
index 0000000..26fd362
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/RangeIntervalFilter.java
@@ -0,0 +1,101 @@
+package org.apache.lucene.search.intervals;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/**
+ * An IntervalFilter that restricts Intervals returned by an IntervalIterator
+ * to those which occur between a given start and end position.
+ *
+ * @lucene.experimental
+ */
+public class RangeIntervalFilter implements IntervalFilter {
+
+ private int start;
+ private int end;
+
+ /**
+ * Constructs a new RangeIntervalFilter
+ * @param start the start of the filtered range
+ * @param end the end of the filtered range
+ */
+ public RangeIntervalFilter(int start, int end) {
+ this.start = start;
+ this.end = end;
+ }
+
+ @Override
+ public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) {
+ return new RangeIntervalIterator(collectIntervals, iter);
+ }
+
+ /**
+ * Wraps an IntervalIterator ignoring Intervals that fall outside a
+ * given range.
+ */
+ private class RangeIntervalIterator extends IntervalIterator {
+
+ private final IntervalIterator iterator;
+ private Interval interval;
+
+ RangeIntervalIterator(boolean collectIntervals, IntervalIterator iter) {
+ super(iter == null ? null : iter.scorer, collectIntervals);
+ this.iterator = iter;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ while ((interval = iterator.next()) != null) {
+ if(interval.end > end) {
+ return null;
+ } else if (interval.begin >= start) {
+ return interval;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return new IntervalIterator[] { iterator };
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ assert collectIntervals;
+ collector.collectComposite(null, interval, iterator.docID());
+ iterator.collect(collector);
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ return iterator.scorerAdvanced(docId);
+ }
+
+ @Override
+ public int matchDistance() {
+ return iterator.matchDistance();
+ }
+
+ }
+
+ @Override
+ public String toString() {
+ return "RANGE(" + start + "," + end + ")";
+ }
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/SloppyIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/SloppyIntervalIterator.java
new file mode 100644
index 0000000..3a275ee
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/SloppyIntervalIterator.java
@@ -0,0 +1,235 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.search.Scorer;
+
+import java.io.IOException;
+
+/**
+ * An interval iterator that has the semantics of sloppy phrase query.
+ */
+public class SloppyIntervalIterator extends IntervalIterator {
+ private final int maxLen;
+ private int matchDistance;
+ private final IntervalIterator iterator;
+
+ /**
+ * Create a SloppyIntervalIterator that matches subiterators within
+ * a specified maxLength
+ * @param scorer the parent Scorer
+ * @param maxLength the maximum distance between the first and last subiterator match
+ * @param collectIntervals true if intervals will be collected
+ * @param iterators the subiterators
+ * @throws IOException if an low level I/O exception occurs
+ */
+ public SloppyIntervalIterator(Scorer scorer, int maxLength,
+ boolean collectIntervals, IntervalIterator... iterators)
+ throws IOException {
+ super(scorer, collectIntervals);
+ this.maxLen = maxLength;
+ this.iterator = new ConjunctionIntervalIterator(scorer, collectIntervals, iterators);
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ Interval current;
+ do {
+ if ((current = iterator.next()) != null) {
+ matchDistance = current.end - current.begin;
+ if (matchDistance <= maxLen) {
+// System.out.println(matchDistance);
+ break;
+ }
+ } else {
+ break;
+ }
+ } while (true);
+ return current;
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ return iterator.scorerAdvanced(docId);
+ }
+
+ public int matchDistance() {
+ return matchDistance;
+ }
+
+ public static IntervalIterator create(Scorer scorer, boolean collectIntervals,
+ IntervalIterator iterator, int... offsets) {
+ if (offsets.length == 1) {
+ return new SingleSlopplyIntervalIterator(scorer, collectIntervals, iterator, offsets[0]);
+ } else {
+ return new SloppyGroupIntervalIterator(scorer, collectIntervals, iterator, offsets);
+ }
+
+ }
+
+ private final static class SingleSlopplyIntervalIterator extends
+ IntervalIterator {
+ private Interval realInterval;
+ private final Interval sloppyInterval = new Interval();
+ private final IntervalIterator iterator;
+ private int offset;
+
+ public SingleSlopplyIntervalIterator(Scorer scorer,
+ boolean collectIntervals, IntervalIterator iterator, int offset) {
+ super(scorer, collectIntervals);
+ this.iterator = iterator;
+ this.offset = offset;
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ return iterator.scorerAdvanced(docId);
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ if ((realInterval = iterator.next()) != null) {
+ sloppyInterval.begin = sloppyInterval.end = realInterval.begin - offset;
+ sloppyInterval.offsetBegin = realInterval.offsetBegin;
+ sloppyInterval.offsetEnd = realInterval.offsetEnd;
+ return sloppyInterval;
+ }
+ return null;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ collector.collectLeafPosition(scorer, realInterval, docID());
+
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return null;
+ }
+
+ @Override
+ public int matchDistance() {
+ return sloppyInterval.end - sloppyInterval.begin;
+ }
+
+ }
+
+ private final static class SloppyGroupIntervalIterator extends
+ IntervalIterator {
+
+ private final Interval sloppyGroupInterval = new Interval();
+ private final int[] offsets;
+ private final Interval[] intervalPositions;
+ private final IntervalIterator groupIterator;
+ private int currentIndex;
+ private boolean initialized;
+
+ public SloppyGroupIntervalIterator(Scorer scorer, boolean collectIntervals,
+ IntervalIterator groupIterator, int... offsets) {
+ super(scorer, collectIntervals);
+ this.offsets = offsets;
+ this.groupIterator = groupIterator;
+ this.intervalPositions = new Interval[offsets.length];
+ for (int i = 0; i < intervalPositions.length; i++) {
+ intervalPositions[i] = new Interval();
+ }
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ initialized = false;
+ return groupIterator.scorerAdvanced(docId);
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ sloppyGroupInterval.begin = Integer.MAX_VALUE;
+ sloppyGroupInterval.end = Integer.MIN_VALUE;
+ if (!initialized) {
+ initialized = true;
+
+ currentIndex = 0;
+ for (int i = 0; i < offsets.length; i++) {
+ Interval current;
+ if ((current = groupIterator.next()) != null) {
+ intervalPositions[i].copy(current);
+
+ int p = current.begin - offsets[i];
+ sloppyGroupInterval.begin = Math.min(sloppyGroupInterval.begin, p);
+ sloppyGroupInterval.end = Math.max(sloppyGroupInterval.end, p);
+ } else {
+ return null;
+ }
+ }
+ sloppyGroupInterval.offsetBegin = intervalPositions[0].offsetBegin;
+ sloppyGroupInterval.offsetEnd = intervalPositions[intervalPositions.length-1].offsetEnd;
+ return sloppyGroupInterval;
+ }
+ Interval current;
+ if ((current = groupIterator.next()) != null) {
+ final int currentFirst = currentIndex++ % intervalPositions.length;
+ intervalPositions[currentFirst].copy(current);
+ int currentIdx = currentIndex;
+ for (int i = 0; i < intervalPositions.length; i++) { // find min / max
+ int idx = currentIdx++ % intervalPositions.length;
+ int p = intervalPositions[idx].begin - offsets[i];
+ sloppyGroupInterval.begin = Math.min(sloppyGroupInterval.begin, p);
+ sloppyGroupInterval.end = Math.max(sloppyGroupInterval.end, p);
+ }
+ sloppyGroupInterval.offsetBegin = intervalPositions[currentIndex % intervalPositions.length].offsetBegin;
+ sloppyGroupInterval.offsetEnd = intervalPositions[currentFirst].offsetEnd;
+ return sloppyGroupInterval;
+ }
+ return null;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ int currentIdx = currentIndex+1;
+ for (int i = 0; i < intervalPositions.length; i++) { // find min / max
+ int idx = currentIdx++ % intervalPositions.length;
+ collector.collectLeafPosition(scorer, intervalPositions[idx],
+ docID());
+ }
+
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return new IntervalIterator[] {groupIterator};
+ }
+
+ @Override
+ public int matchDistance() {
+ return sloppyGroupInterval.end - sloppyGroupInterval.begin;
+ }
+
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ assert collectIntervals;
+ this.iterator.collect(collector);
+
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return null;
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/SnapshotPositionCollector.java b/lucene/core/src/java/org/apache/lucene/search/intervals/SnapshotPositionCollector.java
new file mode 100644
index 0000000..7a4c500
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/SnapshotPositionCollector.java
@@ -0,0 +1,114 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * An IntervalCollector that allows a snapshot of the state of an
+ * IntervalIterator to be taken before it is advanced.
+ *
+ * Conjunction iterators advance their subiterators before the consumer
+ * can call collect on the top level iterator. If intervals are to be
+ * collected, we need to record the last possible match so that we can
+ * return the correct intervals for the match.
+ *
+ * @lucene.internal
+ */
+final class SnapshotPositionCollector implements IntervalCollector {
+
+ private SingleSnapshot[] snapshots;
+ private int index = 0;
+
+ /**
+ * Create a new collector with n snapshots
+ * @param subs the number of subiterators to record
+ */
+ SnapshotPositionCollector(int subs) {
+ snapshots = new SingleSnapshot[subs];
+ }
+
+ @Override
+ public void collectLeafPosition(Scorer scorer, Interval interval,
+ int docID) {
+ collect(scorer, interval, docID, true);
+
+ }
+
+ private void collect(Scorer scorer, Interval interval, int docID,
+ boolean isLeaf) {
+ if (snapshots.length <= index) {
+ grow(ArrayUtil.oversize(index + 1,
+ (RamUsageEstimator.NUM_BYTES_OBJECT_REF * 2)
+ + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ + RamUsageEstimator.NUM_BYTES_BOOLEAN
+ + RamUsageEstimator.NUM_BYTES_INT));
+ }
+ if (snapshots[index] == null) {
+ snapshots[index] = new SingleSnapshot();
+ }
+ snapshots[index++].set(scorer, interval, isLeaf, docID);
+ }
+
+ @Override
+ public void collectComposite(Scorer scorer, Interval interval,
+ int docID) {
+ collect(scorer, interval, docID, false);
+ }
+
+ void replay(IntervalCollector collector) {
+ for (int i = 0; i < index; i++) {
+ SingleSnapshot singleSnapshot = snapshots[i];
+ if (singleSnapshot.isLeaf) {
+ collector.collectLeafPosition(singleSnapshot.scorer,
+ singleSnapshot.interval, singleSnapshot.docID);
+ } else {
+ collector.collectComposite(singleSnapshot.scorer,
+ singleSnapshot.interval, singleSnapshot.docID);
+ }
+ }
+ }
+
+ void reset() {
+ index = 0;
+ }
+
+ private void grow(int size) {
+ final SingleSnapshot[] newArray = new SingleSnapshot[size];
+ System.arraycopy(snapshots, 0, newArray, 0, index);
+ snapshots = newArray;
+ }
+
+ private static final class SingleSnapshot {
+ Scorer scorer;
+ final Interval interval = new Interval();
+ boolean isLeaf;
+ int docID;
+
+ void set(Scorer scorer, Interval interval, boolean isLeaf,
+ int docID) {
+ this.scorer = scorer;
+ this.interval.copy(interval);
+ this.isLeaf = isLeaf;
+ this.docID = docID;
+ }
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/TermIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/TermIntervalIterator.java
new file mode 100644
index 0000000..fa5c611
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/TermIntervalIterator.java
@@ -0,0 +1,127 @@
+package org.apache.lucene.search.intervals;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.search.Scorer;
+
+import java.io.IOException;
+
+
+/**
+ * Iterates over the individual positions of a term in a document
+ */
+public final class TermIntervalIterator extends IntervalIterator {
+
+ private final Interval interval;
+ int positionsPending;
+ private final DocsAndPositionsEnum docsAndPos;
+ private int docID = -1;
+ private final String field;
+
+ /**
+ * Constructs a new TermIntervalIterator
+ * @param scorer the parent Scorer
+ * @param docsAndPos a DocsAndPositionsEnum positioned on the current document
+ * @param doPayloads true if payloads should be retrieved for the positions
+ * @param collectIntervals true if positions will be collected
+ */
+ public TermIntervalIterator(Scorer scorer, DocsAndPositionsEnum docsAndPos,
+ boolean doPayloads, boolean collectIntervals, String field) {
+ super(scorer, collectIntervals);
+ this.docsAndPos = docsAndPos;
+ this.interval = new Interval(field);
+ this.field = field;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ if (--positionsPending >= 0) {
+ interval.begin = interval.end = docsAndPos.nextPosition();
+ interval.offsetBegin = docsAndPos.startOffset();
+ interval.offsetEnd = docsAndPos.endOffset();
+ return interval;
+ }
+ positionsPending = 0;
+ return null;
+ }
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return EMPTY;
+ }
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ collector.collectLeafPosition(scorer, interval, docID);
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+// interval.reset();
+ if (docsAndPos.docID() == docId) {
+ positionsPending = docsAndPos.freq();
+ } else {
+ positionsPending = -1;
+ }
+ return docID = docsAndPos.docID();
+ }
+
+ @Override
+ public String toString() {
+ return "TermPositions [interval=" + interval + ", positionsPending="
+ + positionsPending + ", docID=" + docID + "]";
+ }
+
+ @Override
+ public int matchDistance() {
+ return 0;
+ }
+// TODO not supported yet - need to figure out what that means really to support payloads
+// private static final class PayloadInterval extends Interval {
+// private int pos = -1;
+// private final DocsAndPositionsEnum payloads;
+// private final TermIntervalIterator termPos;
+//
+// public PayloadInterval(DocsAndPositionsEnum payloads, TermIntervalIterator pos) {
+// this.payloads = payloads;
+// this.termPos = pos;
+// }
+//
+// @Override
+// public BytesRef nextPayload() throws IOException {
+// if (pos == termPos.positionsPending) {
+// return null;
+// } else {
+// pos = termPos.positionsPending;
+// return payloads.getPayload();
+// }
+// }
+//
+// @Override
+// public void reset() {
+// super.reset();
+// pos = -1;
+// }
+//
+// }
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/UnorderedNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/intervals/UnorderedNearQuery.java
new file mode 100644
index 0000000..feda7b1
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/UnorderedNearQuery.java
@@ -0,0 +1,82 @@
+package org.apache.lucene.search.intervals;
+
+/**
+ * Copyright (c) 2012 Lemur Consulting Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.Query;
+
+/**
+ * A query that matches if a set of subqueries also match, and are within
+ * a given distance of each other within the document. The subqueries
+ * may appear in the document in any order.
+ *
+ * N.B. Positions must be included in the index for this query to work
+ *
+ * Implements the LOWPASSk operator as defined in "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics"
+ *
+ * @lucene.experimental
+ */
+
+public class UnorderedNearQuery extends IntervalFilterQuery {
+
+ private final int slop;
+
+ /**
+ * Constructs an OrderedNearQuery
+ * @param slop the maximum distance between the subquery matches
+ * @param collectLeaves false if only the parent interval should be collected
+ * @param subqueries the subqueries to match.
+ */
+ public UnorderedNearQuery(int slop, boolean collectLeaves, Query... subqueries) {
+ super(createFieldConjunction(subqueries), new WithinUnorderedFilter(slop + subqueries.length - 2, collectLeaves));
+ this.slop = slop;
+ }
+
+ /**
+ * Constructs an OrderedNearQuery
+ * @param slop the maximum distance between the subquery matches
+ * @param subqueries the subqueries to match.
+ */
+ public UnorderedNearQuery(int slop, Query... subqueries) {
+ this(slop, true, subqueries);
+ }
+
+ @Override
+ public String toString() {
+ return "UnorderedNear/" + slop + ":" + super.toString("");
+ }
+
+ public static class WithinUnorderedFilter implements IntervalFilter {
+
+ final IntervalFilter innerFilter;
+ final boolean collectLeaves;
+
+ public WithinUnorderedFilter(int slop, boolean collectLeaves) {
+ this.innerFilter = new WithinIntervalFilter(slop);
+ this.collectLeaves = collectLeaves;
+ }
+
+ @Override
+ public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) {
+ return innerFilter.filter(collectIntervals,
+ new ConjunctionIntervalIterator(iter.scorer, collectIntervals, collectLeaves, iter.subs(false)));
+ }
+ }
+
+}
+
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/WithinIntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/WithinIntervalFilter.java
new file mode 100644
index 0000000..02dc955
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/WithinIntervalFilter.java
@@ -0,0 +1,121 @@
+package org.apache.lucene.search.intervals;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+
+
+/**
+ * An IntervalFilter that restricts Intervals returned by an IntervalIterator
+ * to those which have a matchDistance less than a defined slop.
+ *
+ * @lucene.experimental
+ */
+public class WithinIntervalFilter implements IntervalFilter {
+
+ private final int slop;
+ private boolean collectLeaves = true;
+
+ /**
+ * Construct a new WithinIntervalFilter
+ * @param slop the maximum slop allowed for subintervals
+ */
+ public WithinIntervalFilter(int slop) {
+ this.slop = slop;
+ }
+
+ /**
+ * Construct a new WithinIntervalFilter
+ * @param slop the maximum slop allowed for subintervals
+ */
+ public WithinIntervalFilter(int slop, boolean collectLeaves) {
+ this.slop = slop;
+ this.collectLeaves = collectLeaves;
+ }
+
+ /**
+ * @return the slop
+ */
+ public int getSlop() {
+ return slop;
+ }
+
+ @Override
+ public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) {
+ return new WithinIntervalIterator(collectIntervals, iter);
+ }
+
+ class WithinIntervalIterator extends IntervalIterator {
+
+ private IntervalIterator iterator;
+ private Interval interval;
+
+ WithinIntervalIterator(boolean collectIntervals, IntervalIterator iter) {
+ super(iter == null ? null : iter.scorer, collectIntervals);
+ this.iterator = iter;
+ }
+
+ @Override
+ public Interval next() throws IOException {
+ while ((interval = iterator.next()) != null) {
+ if((iterator.matchDistance()) <= slop){
+ return interval;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public IntervalIterator[] subs(boolean inOrder) {
+ return new IntervalIterator[] {iterator};
+ }
+
+
+ @Override
+ public void collect(IntervalCollector collector) {
+ assert collectIntervals;
+ collector.collectComposite(null, interval, iterator.docID());
+ if (collectLeaves)
+ iterator.collect(collector);
+ }
+
+ @Override
+ public int scorerAdvanced(int docId) throws IOException {
+ return iterator.scorerAdvanced(docId);
+ }
+
+ @Override
+ public int matchDistance() {
+ return iterator.matchDistance();
+ }
+
+ @Override
+ public String toString() {
+ return "WithinIntervalIterator[" + iterator.docID() + ":" + interval + "]";
+ }
+
+ @Override
+ public int docID() {
+ return iterator.docID();
+ }
+
+ }
+
+ @Override
+ public String toString() {
+ return "WITHIN(" + slop + ")";
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/WithinOrderedFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/WithinOrderedFilter.java
new file mode 100644
index 0000000..34a4458
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/WithinOrderedFilter.java
@@ -0,0 +1,58 @@
+package org.apache.lucene.search.intervals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An IntervalFilter that restricts an IntervalIterator to return
+ * only Intervals that occur in order within a given distance.
+ *
+ * @see WithinIntervalFilter
+ */
+public class WithinOrderedFilter implements IntervalFilter {
+
+ private final WithinIntervalFilter innerFilter;
+ private final boolean collectLeaves;
+ private final String field;
+
+ /**
+ * Constructs a new WithinOrderedFilter with a given slop
+ * @param slop The maximum distance allowed between subintervals
+ * @param collectLeaves false if only the parent interval should be collected
+ */
+ public WithinOrderedFilter(String field, int slop, boolean collectLeaves) {
+ this.innerFilter = new WithinIntervalFilter(slop);
+ this.collectLeaves = collectLeaves;
+ this.field = field;
+ }
+
+ public WithinOrderedFilter(String field, int slop) {
+ this(field, slop, true);
+ }
+
+ @Override
+ public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) {
+ return innerFilter.filter(collectIntervals,
+ new OrderedConjunctionIntervalIterator(collectIntervals, collectLeaves, field, iter));
+ }
+
+ @Override
+ public String toString() {
+ return "ORDEREDWITHIN(" + this.innerFilter.getSlop() + ")";
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/package.html b/lucene/core/src/java/org/apache/lucene/search/intervals/package.html
new file mode 100644
index 0000000..75eac5f
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/intervals/package.html
@@ -0,0 +1,70 @@
+
+
+
+
+ org.apache.lucene.search.intervals
+
+
+
Interval Iterators
+
+Lucene offers extensive query and scoring flexibility including boolean queries, specialized phrase queries, wildcards and many more. The intervals package aims
+to provide a common interface to Lucene's proximity features available on all core queries. The central class in this package is
+{@link org.apache.lucene.search.intervals.IntervalIterator IntervalIterator}, which allows iterative consumption of term positions and offsets on complex queries.
+{@link org.apache.lucene.search.Scorer Scorer} exposes direct access to the queries' {@link org.apache.lucene.search.intervals.IntervalIterator IntervalIterator} reflecting a logical view
+of the scorer on positions and offsets for each matching document.
+
+Intervals are entirely detached from scoring/matching documents and have no effect on query performance if proximity information or offsets are not needed or consumed. Its lazy nature requires
+the user to specify the need for positions/offsets at scorer creation time per segment allowing for a large number of usecases:
+
+
+
Proximity matching without scoring ie. if token positions are needed for filtering out documents but the actual query score should not be modified
+
Second pass scoring ie. for high-performance proximity queries common practice is to re-score the top N (usually a large N) results of a non-proximity query with proximity information to improve precision.
+
Collecting an exhaustive list of intervals per query ie. complex queries might be interested in actual term positions across the entire query tree
+
Highlighting queries without re-analyzing the document or storing term vectors if offsets are stored in the index. Especially large documents will see a tremendous performance and space-consumption improvement over term-vectors / re-analyzing
+
Specializing queries for exotic proximity operators based on core queries
+
+
+
Core Iterators and Queries
+
+The intervals package provides a basic set of {@link org.apache.lucene.search.intervals.IntervalIterator IntervalIterator} and {@link org.apache.lucene.search.Query Query} implementation
+based on minimum interval semantics, as defined in
+"Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics"
+
+ The following {@link org.apache.lucene.search.intervals.IntervalIterator IntervalIterator} implementations are provided:
+
+
{@link org.apache.lucene.search.intervals.BlockIntervalIterator - BlockIntervalIterator} -- an iterator providing an ordered phrasal operator with given gaps between sub-iterators
+
{@link org.apache.lucene.search.intervals.OrderedConjunctionIntervalIterator - OrderedConjunctionIntervalIterator} -- an iterator providing an ordered non-overlapping conjunction operator
+
{@link org.apache.lucene.search.intervals.ConjunctionIntervalIterator - ConjunctionIntervalIterator} -- an iterator providing a unordered conjunction operator
+
{@link org.apache.lucene.search.intervals.BrouwerianIntervalIterator - BrouwerianIntervalIterator} -- an iterator computing the non-overlapping difference between two iterators
+
{@link org.apache.lucene.search.intervals.DisjunctionIntervalIterator - DisjunctionIntervalIterator} -- an iterator providing a unordered disjunction operator
+
+ All queries require positions to be stored in the index.
+
+
+
+ The following Query implementations are provided:
+
+
{@link org.apache.lucene.search.intervals.IntervalFilterQuery - IntervalFilterQuery} -- Filters a Query based on the positions or ranges of its component parts
+
{@link org.apache.lucene.search.intervals.OrderedNearQuery - OrderedNearQuery} -- Filters queries based on the ordered difference between their match positions in a document
+
{@link org.apache.lucene.search.intervals.UnorderedNearQuery - UnorderedNearQuery} -- Filters queries based on the unordered difference between their match positions in a document
+
{@link org.apache.lucene.search.intervals.NonOverlappingQuery - NonOverlappingQuery} -- Filters out queries with overlapping match positions
+
+ All queries require positions to be stored in the index.
+
+
+
diff --git a/lucene/core/src/java/org/apache/lucene/search/package.html b/lucene/core/src/java/org/apache/lucene/search/package.html
index 1be51fb..33ad619 100644
--- a/lucene/core/src/java/org/apache/lucene/search/package.html
+++ b/lucene/core/src/java/org/apache/lucene/search/package.html
@@ -436,14 +436,16 @@ on the built-in available scoring models and extending or changing Similarity.
that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will just defer to the Similarity's implementation:
{@link org.apache.lucene.search.similarities.Similarity.SimWeight#normalize SimWeight#normalize(float,float)}.
- {@link org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.AtomicReaderContext, org.apache.lucene.util.Bits)
- scorer(AtomicReaderContext context, Bits acceptDocs)} —
+ {@link
+ org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.AtomicReaderContext,
+ PostingFeatures flags, org.apache.lucene.util.Bits)
+ scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs)} —
Construct a new {@link org.apache.lucene.search.Scorer Scorer} for this Weight. See The Scorer Class
below for help defining a Scorer. As the name implies, the Scorer is responsible for doing the actual scoring of documents
given the Query.