diff --git a/.gitignore b/.gitignore index 443dd2f..811524e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,7 +19,6 @@ /prj.el /bin /bin.* -**/pom.xml /nbproject /nb-build @@ -43,4 +42,219 @@ solr/contrib/dataimporthandler/test-lib/ solr/contrib/morphlines-core/test-lib/ +# ./solr/example/example-DIH/solr/db +/solr/example/example-DIH/solr/db/data + + +# ./solr/example/example-DIH/solr/db/conf +/solr/example/example-DIH/solr/db/conf/dataimport.properties + + +# ./solr/example/example-DIH/solr/tika +/solr/example/example-DIH/solr/tika/data + + +# ./solr/site-src +/solr/site-src/build + + +# ./solr/webapp +/solr/webapp/pom.xml + + +# ./solr/test-framework +/solr/test-framework/pom.xml + + +# ./solr/contrib +/solr/contrib/pom.xml + + +# ./solr/contrib/clustering +/solr/contrib/clustering/build +/solr/contrib/clustering/*.iml +/solr/contrib/clustering/pom.xml + + +# ./solr/contrib/clustering/lib +/solr/contrib/clustering/lib/pcj-* +/solr/contrib/clustering/lib/simple-xml-* +/solr/contrib/clustering/lib/colt-* +/solr/contrib/clustering/lib/nni-* +/solr/contrib/clustering/lib/downloads + + +# ./solr/contrib/uima +/solr/contrib/uima/build +/solr/contrib/uima/*.iml +/solr/contrib/uima/pom.xml + + +# ./solr/contrib/velocity +/solr/contrib/velocity/*.iml +/solr/contrib/velocity/pom.xml + + +# ./solr/contrib/extraction +/solr/contrib/extraction/build +/solr/contrib/extraction/*.iml +/solr/contrib/extraction/pom.xml + + +# ./solr/contrib/analysis-extras +/solr/contrib/analysis-extras/build +/solr/contrib/analysis-extras/lucene-libs +/solr/contrib/analysis-extras/*.iml +/solr/contrib/analysis-extras/pom.xml + + +# ./solr/contrib/langid +/solr/contrib/langid/*.iml +/solr/contrib/langid/pom.xml + + +# ./solr/contrib/dataimporthandler-extras +/solr/contrib/dataimporthandler-extras/*.iml +/solr/contrib/dataimporthandler-extras/pom.xml + + +# ./solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/conf +/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/conf/dataimport.properties + + +# ./solr/contrib/dataimporthandler +/solr/contrib/dataimporthandler/target +/solr/contrib/dataimporthandler/*.iml +/solr/contrib/dataimporthandler/pom.xml + + +# ./solr/contrib/dataimporthandler/src +/solr/contrib/dataimporthandler/src/pom.xml + + +# ./solr/contrib/dataimporthandler/src/test-files +/solr/contrib/dataimporthandler/src/test-files/dataimport.properties + + +# ./solr/contrib/dataimporthandler/src/test-files/dih/solr/conf +/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport.properties + + +# ./modules +/modules/build +/modules/dist +/modules/pom.xml + + +# ./modules/facet +/modules/facet/build +/modules/facet/dist +/modules/facet/*.iml +/modules/facet/pom.xml + + +# ./modules/benchmark +/modules/benchmark/temp +/modules/benchmark/work +/modules/benchmark/build +/modules/benchmark/*.iml +/modules/benchmark/pom.xml + + +# ./modules/grouping +/modules/grouping/build +/modules/grouping/dist +/modules/grouping/*.iml +/modules/grouping/pom.xml + + +# ./modules/spatial +/modules/spatial/build +/modules/spatial/pom.xml +/modules/spatial/*.iml + + +# ./modules/analysis +/modules/analysis/build +/modules/analysis/pom.xml + + +# ./modules/analysis/icu +/modules/analysis/icu/*.iml +/modules/analysis/icu/pom.xml + + +# ./modules/analysis/uima +/modules/analysis/uima/*.iml +/modules/analysis/uima/pom.xml + + +# ./modules/analysis/morfologik +/modules/analysis/morfologik/*.iml +/modules/analysis/morfologik/pom.xml + + +# ./modules/analysis/common +/modules/analysis/common/*.iml +/modules/analysis/common/pom.xml + + +# ./modules/analysis/common/src/java/org/apache/lucene/analysis/standard +/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/Token.java +/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java +/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java +/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/TokenMgrError.java +/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/CharStream.java +/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerConstants.java + + +# ./modules/analysis/stempel +/modules/analysis/stempel/*.iml +/modules/analysis/stempel/pom.xml + + +# ./modules/analysis/phonetic +/modules/analysis/phonetic/*.iml +/modules/analysis/phonetic/pom.xml + + +# ./modules/analysis/smartcn +/modules/analysis/smartcn/*.iml +/modules/analysis/smartcn/pom.xml + + +# ./modules/analysis/kuromoji +/modules/analysis/kuromoji/*.iml +/modules/analysis/kuromoji/pom.xml + + +# ./modules/queries +/modules/queries/build +/modules/queries/pom.xml +/modules/queries/*.iml + + +# ./modules/join +/modules/join/build +/modules/join/dist +/modules/join/*.iml +/modules/join/pom.xml + + +# ./modules/queryparser +/modules/queryparser/pom.xml +/modules/queryparser/*.iml +/modules/queryparser/build + + +# ./modules/suggest +/modules/suggest/build +/modules/suggest/*.iml +/modules/suggest/pom.xml +/modules/suggest/dist + +/modules/spatial/build/ + +# maven-build +target/ solr/core/test-lib/ diff --git a/README.txt b/README.txt index 3599b5b..addf65c 100644 --- a/README.txt +++ b/README.txt @@ -1,13 +1,13 @@ -Apache Lucene/Solr +Apache Lucene/Solr - positions fork lucene/ is a search engine library solr/ is a search engine server that uses lucene -To compile the sources run 'ant compile' -To run all the tests run 'ant test' -To setup your ide run 'ant idea', 'ant netbeans', or 'ant eclipse' -For Maven info, see dev-tools/maven/README.maven +This version of lucene/solr is a fork based on +https://issues.apache.org/jira/browse/LUCENE-2878, that allows consumer code to +iterate through individual hit positions on a searcher match. -For more information on how to contribute see: -http://wiki.apache.org/lucene-java/HowToContribute -http://wiki.apache.org/solr/HowToContribute +To build: + +cd maven-build +mvn -DskipTests install diff --git a/dev-tools/idea/lucene/highlighter/highlighter.iml b/dev-tools/idea/lucene/highlighter/highlighter.iml index 0a8e679..8b6d644 100644 --- a/dev-tools/idea/lucene/highlighter/highlighter.iml +++ b/dev-tools/idea/lucene/highlighter/highlighter.iml @@ -12,6 +12,7 @@ + diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template index 5539f18..c257b1e 100644 --- a/dev-tools/maven/pom.xml.template +++ b/dev-tools/maven/pom.xml.template @@ -176,6 +176,11 @@ org.apache.maven.plugins + maven-release-plugin + 2.5 + + + org.apache.maven.plugins maven-clean-plugin 2.5 diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java index 53b4ecd..d4e347b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java @@ -118,6 +118,12 @@ public final class StemmerOverrideFilter extends TokenFilter { /** * Returns the value mapped to the given key or null if the key is not in the FST dictionary. + * @param buffer a char[] buffer containing the key + * @param bufferLen the length of the char[] buffer + * @param scratchArc a scratch Arc + * @param fstReader an fstReader + * @return a {@link BytesRef} pointing to the value or null + * @throws IOException */ public BytesRef get(char[] buffer, int bufferLen, Arc scratchArc, BytesReader fstReader) throws IOException { BytesRef pendingOutput = fst.outputs.getNoOutput(); diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 4d7635d..b6e3af5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -17,13 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -32,6 +25,13 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + /** A Query that matches documents matching boolean combinations of other * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other * BooleanQuerys. @@ -147,6 +147,20 @@ public class BooleanQuery extends Query implements Iterable { clauses.add(clause); } + @Override + public String getField() { + if (clauses().size() == 0) + return null; + final String field = clauses.get(0).getQuery().getField(); + if (field == null) + return null; + for (BooleanClause clause : clauses) { + if (!field.equals(clause.getQuery().getField())) + return null; + } + return field; + } + /** Returns the set of clauses in this query. */ public BooleanClause[] getClauses() { return clauses.toArray(new BooleanClause[clauses.size()]); @@ -242,7 +256,7 @@ public class BooleanQuery extends Query implements Iterable { for (Iterator wIter = weights.iterator(); wIter.hasNext();) { Weight w = wIter.next(); BooleanClause c = cIter.next(); - if (w.scorer(context, context.reader().getLiveDocs()) == null) { + if (w.scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()) == null) { if (c.isRequired()) { fail = true; Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); @@ -306,12 +320,12 @@ public class BooleanQuery extends Query implements Iterable { @Override public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, - Bits acceptDocs) throws IOException { + PostingFeatures flags, Bits acceptDocs) throws IOException { if (scoreDocsInOrder || minNrShouldMatch > 1) { // TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch // but the same is even true of pure conjunctions... - return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); } List prohibited = new ArrayList(); @@ -319,7 +333,7 @@ public class BooleanQuery extends Query implements Iterable { Iterator cIter = clauses.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); - BulkScorer subScorer = w.bulkScorer(context, false, acceptDocs); + BulkScorer subScorer = w.bulkScorer(context, false, flags, acceptDocs); if (subScorer == null) { if (c.isRequired()) { return null; @@ -328,7 +342,7 @@ public class BooleanQuery extends Query implements Iterable { // TODO: there are some cases where BooleanScorer // would handle conjunctions faster than // BooleanScorer2... - return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); } else if (c.isProhibited()) { prohibited.add(subScorer); } else { @@ -340,7 +354,7 @@ public class BooleanQuery extends Query implements Iterable { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { // initially the user provided value, // but if minNrShouldMatch == optional.size(), @@ -353,7 +367,7 @@ public class BooleanQuery extends Query implements Iterable { Iterator cIter = clauses.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); - Scorer subScorer = w.scorer(context, acceptDocs); + Scorer subScorer = w.scorer(context, flags, acceptDocs); if (subScorer == null) { if (c.isRequired()) { return null; diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java index 173bb44..dbd8827 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java @@ -17,15 +17,11 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.search.BooleanQuery.BooleanWeight; + import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; import java.util.List; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.search.BooleanQuery.BooleanWeight; - /* Description from Doug Cutting (excerpted from * LUCENE-1483): * @@ -96,14 +92,14 @@ final class BooleanScorer extends BulkScorer { public void setScorer(Scorer scorer) { this.scorer = scorer; } - + @Override public boolean acceptsDocsOutOfOrder() { return true; } } - + static final class Bucket { int doc = -1; // tells if bucket is valid double score; // incremental score @@ -262,7 +258,6 @@ final class BooleanScorer extends BulkScorer { return false; } - @Override public String toString() { StringBuilder buffer = new StringBuilder(); buffer.append("boolean("); diff --git a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java index c5957d8..3e6e757 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/CachingCollector.java @@ -18,6 +18,8 @@ package org.apache.lucene.search; */ import org.apache.lucene.index.AtomicReaderContext; + +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -77,9 +79,14 @@ public abstract class CachingCollector extends FilterCollector { public final int nextDoc() { throw new UnsupportedOperationException(); } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { throw new UnsupportedOperationException(); } + + @Override public long cost() { return 1; } + } + // A CachingCollector which caches scores private static class NoScoreCachingCollector extends CachingCollector { List acceptDocsOutOfOrders; diff --git a/lucene/core/src/java/org/apache/lucene/search/Collector.java b/lucene/core/src/java/org/apache/lucene/search/Collector.java index bb47394..9749748 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Collector.java +++ b/lucene/core/src/java/org/apache/lucene/search/Collector.java @@ -17,10 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; +import java.io.IOException; + /** *

Expert: Collectors are primarily meant to be used to * gather raw results from a search, and implement sorting diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java index 3e81187..5b5b2fe 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -17,15 +17,19 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.search.intervals.CombinedIntervalIterator; +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.util.ArrayUtil; + import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; -import org.apache.lucene.util.ArrayUtil; - /** Scorer for conjunctions, sets of queries, all of which are required. */ class ConjunctionScorer extends Scorer { + + private final Scorer[] scorersOrdered; protected int lastDoc = -1; protected final DocsAndFreqs[] docsAndFreqs; private final DocsAndFreqs lead; @@ -42,7 +46,10 @@ class ConjunctionScorer extends Scorer { for (int i = 0; i < scorers.length; i++) { docsAndFreqs[i] = new DocsAndFreqs(scorers[i]); } - // Sort the array the first time to allow the least frequent DocsEnum to + scorersOrdered = new Scorer[scorers.length]; + System.arraycopy(scorers, 0, scorersOrdered, 0, scorers.length); + + // Sort the array the first time to allow the least frequent DocsEnum to // lead the matching. ArrayUtil.timSort(docsAndFreqs, new Comparator() { @Override @@ -114,6 +121,16 @@ class ConjunctionScorer extends Scorer { public int freq() { return docsAndFreqs.length; } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + if (scorersOrdered == null) { + throw new IllegalStateException("no positions requested for this scorer"); + } + // only created if needed for this scorer - no penalty for non-positional queries + return new CombinedIntervalIterator(this, collectIntervals, pullIterators(collectIntervals, scorersOrdered)); + } + @Override public long cost() { diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java index d26abe1..9cc9560 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java @@ -17,8 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; @@ -28,8 +26,10 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; -import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; class ConstantScoreAutoRewrite extends TermCollectingRewrite { @@ -109,7 +109,7 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite { } } // Strip scores - final Query result = new ConstantScoreQuery(bq); + final Query result = new ConstantScoreQuery(query.getField(), bq); result.setBoost(query.getBoost()); return result; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 2b7f4ed..b6ae6f0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -20,6 +20,7 @@ package org.apache.lucene.search; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; @@ -36,14 +37,20 @@ import java.util.Set; public class ConstantScoreQuery extends Query { protected final Filter filter; protected final Query query; + protected final String field; /** Strips off scores from the passed in Query. The hits will get a constant score * dependent on the boost factor of this query. */ public ConstantScoreQuery(Query query) { + this(null, query); + } + + public ConstantScoreQuery(String field, Query query) { if (query == null) throw new NullPointerException("Query may not be null"); this.filter = null; this.query = query; + this.field = field; } /** Wraps a Filter as a Query. The hits will get a constant score @@ -57,6 +64,7 @@ public class ConstantScoreQuery extends Query { throw new NullPointerException("Filter may not be null"); this.filter = filter; this.query = null; + this.field = null; } /** Returns the encapsulated filter, returns {@code null} if a query is wrapped. */ @@ -70,6 +78,11 @@ public class ConstantScoreQuery extends Query { } @Override + public String getField() { + return field; + } + + @Override public Query rewrite(IndexReader reader) throws IOException { if (query != null) { Query rewritten = query.rewrite(reader); @@ -134,14 +147,14 @@ public class ConstantScoreQuery extends Query { } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, PostingFeatures flags, Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; - return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); } else { assert query != null && innerWeight != null; - BulkScorer bulkScorer = innerWeight.bulkScorer(context, scoreDocsInOrder, acceptDocs); + BulkScorer bulkScorer = innerWeight.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); if (bulkScorer == null) { return null; } @@ -150,7 +163,7 @@ public class ConstantScoreQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; @@ -161,7 +174,7 @@ public class ConstantScoreQuery extends Query { disi = dis.iterator(); } else { assert query != null && innerWeight != null; - disi = innerWeight.scorer(context, acceptDocs); + disi = innerWeight.scorer(context, flags, acceptDocs); } if (disi == null) { @@ -177,7 +190,7 @@ public class ConstantScoreQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - final Scorer cs = scorer(context, context.reader().getLiveDocs()); + final Scorer cs = scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); final boolean exists = (cs != null && cs.advance(doc) == doc); final ComplexExplanation result = new ComplexExplanation(); @@ -267,6 +280,15 @@ public class ConstantScoreQuery extends Query { public long cost() { return docIdSetIterator.cost(); } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + if (docIdSetIterator instanceof Scorer) { + return ((Scorer) docIdSetIterator).intervals(collectIntervals); + } else { + throw new UnsupportedOperationException("positions are only supported on Scorer subclasses"); + } + } @Override public Collection getChildren() { diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index c195497..b365695 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -16,6 +16,11 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.Bits; + import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -23,11 +28,6 @@ import java.util.Iterator; import java.util.List; import java.util.Set; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.util.Bits; - /** * A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum * score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries. @@ -153,11 +153,11 @@ public class DisjunctionMaxQuery extends Query implements Iterable { /** Create the scorer used to score our associated DisjunctionMaxQuery */ @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { List scorers = new ArrayList<>(); for (Weight w : weights) { // we will advance() subscorers - Scorer subScorer = w.scorer(context, acceptDocs); + Scorer subScorer = w.scorer(context, flags, acceptDocs); if (subScorer != null) { scorers.add(subScorer); } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java index b5d0a0d..bf1cc47 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java @@ -16,6 +16,9 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.search.intervals.DisjunctionIntervalIterator; +import org.apache.lucene.search.intervals.IntervalIterator; + import java.io.IOException; /** @@ -46,6 +49,7 @@ final class DisjunctionMaxScorer extends DisjunctionScorer { DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, Scorer[] subScorers) { super(weight, subScorers); this.tieBreakerMultiplier = tieBreakerMultiplier; + } @Override @@ -66,4 +70,10 @@ final class DisjunctionMaxScorer extends DisjunctionScorer { protected float getFinal() { return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier; } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return new DisjunctionIntervalIterator(this, collectIntervals, pullIterators(collectIntervals, subScorers)); + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index 5b7e2ff..62b633a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -25,7 +25,7 @@ import java.util.Collection; * Base class for Scorers that score disjunctions. */ abstract class DisjunctionScorer extends Scorer { - private final Scorer subScorers[]; + protected final Scorer subScorers[]; private int numScorers; /** The document number of the current match. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java index f291695..6b711b2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java @@ -17,6 +17,9 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.search.intervals.DisjunctionIntervalIterator; +import org.apache.lucene.search.intervals.IntervalIterator; + import java.io.IOException; /** A Scorer for OR like queries, counterpart of ConjunctionScorer. @@ -50,4 +53,10 @@ final class DisjunctionSumScorer extends DisjunctionScorer { protected float getFinal() { return (float)score * coord[freq]; } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return new DisjunctionIntervalIterator(this, collectIntervals, pullIterators(collectIntervals, subScorers)); + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index 909cfe0..684215b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -17,25 +17,30 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.search.PhraseQuery.TermDocsEnumFactory; +import org.apache.lucene.search.intervals.BlockIntervalIterator; +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.search.intervals.TermIntervalIterator; +import org.apache.lucene.search.similarities.Similarity; + import java.io.IOException; import java.util.Arrays; -import org.apache.lucene.index.*; -import org.apache.lucene.search.similarities.Similarity; - final class ExactPhraseScorer extends Scorer { private final int endMinus1; - + private final static int CHUNK = 4096; - + private int gen; private final int[] counts = new int[CHUNK]; private final int[] gens = new int[CHUNK]; - + boolean noDocs; private final long cost; private final static class ChunkState { + final TermDocsEnumFactory factory; final DocsAndPositionsEnum posEnum; final int offset; final boolean useAdvance; @@ -43,26 +48,30 @@ final class ExactPhraseScorer extends Scorer { int posLimit; int pos; int lastPos; - - public ChunkState(DocsAndPositionsEnum posEnum, int offset, boolean useAdvance) { + + public ChunkState(TermDocsEnumFactory factory, DocsAndPositionsEnum posEnum, int offset, + boolean useAdvance) throws IOException { + this.factory = factory; this.posEnum = posEnum; this.offset = offset; this.useAdvance = useAdvance; } } - + private final ChunkState[] chunkStates; - + private int docID = -1; private int freq; private final Similarity.SimScorer docScorer; - + private final String field; + ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - Similarity.SimScorer docScorer) throws IOException { + Similarity.SimScorer docScorer, String field) throws IOException { super(weight); this.docScorer = docScorer; - + this.field = field; + chunkStates = new ChunkState[postings.length]; endMinus1 = postings.length-1; @@ -71,36 +80,37 @@ final class ExactPhraseScorer extends Scorer { cost = postings[0].postings.cost(); for(int i=0;i 1/5th) rarer than // the first term, then we just use .nextDoc() when - // ANDing. This buys ~15% gain for phrases where + // ANDing. This buys ~15% gain for phrases where // freq of rarest 2 terms is close: - final boolean useAdvance = postings[i].docFreq > 5*postings[0].docFreq; - chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position, useAdvance); - if (i > 0 && postings[i].postings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { + final boolean useAdvance = postings[i].docFreq > 5 * postings[0].docFreq; + chunkStates[i] = new ChunkState(postings[i].factory, postings[i].postings, + -postings[i].position, useAdvance); + if (i > 0 + && postings[i].postings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { noDocs = true; return; } } } - + @Override public int nextDoc() throws IOException { - while(true) { - + while (true) { + // first (rarest) term final int doc = chunkStates[0].posEnum.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { docID = doc; return doc; } - + // not-first terms int i = 1; - while(i < chunkStates.length) { + while (i < chunkStates.length) { final ChunkState cs = chunkStates[i]; int doc2 = cs.posEnum.docID(); if (cs.useAdvance) { @@ -109,7 +119,7 @@ final class ExactPhraseScorer extends Scorer { } } else { int iter = 0; - while(doc2 < doc) { + while (doc2 < doc) { // safety net -- fallback to .advance if we've // done too many .nextDocs if (++iter == 50) { @@ -125,12 +135,12 @@ final class ExactPhraseScorer extends Scorer { } i++; } - + if (i == chunkStates.length) { // this doc has all the terms -- now test whether // phrase occurs docID = doc; - + freq = phraseFreq(); if (freq != 0) { return docID; @@ -138,22 +148,22 @@ final class ExactPhraseScorer extends Scorer { } } } - + @Override public int advance(int target) throws IOException { - + // first term int doc = chunkStates[0].posEnum.advance(target); if (doc == DocIdSetIterator.NO_MORE_DOCS) { docID = DocIdSetIterator.NO_MORE_DOCS; return doc; } - - while(true) { + + while (true) { // not-first terms int i = 1; - while(i < chunkStates.length) { + while (i < chunkStates.length) { int doc2 = chunkStates[i].posEnum.docID(); if (doc2 < doc) { doc2 = chunkStates[i].posEnum.advance(doc); @@ -163,7 +173,7 @@ final class ExactPhraseScorer extends Scorer { } i++; } - + if (i == chunkStates.length) { // this doc has all the terms -- now test whether // phrase occurs @@ -173,7 +183,7 @@ final class ExactPhraseScorer extends Scorer { return docID; } } - + doc = chunkStates[0].posEnum.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { docID = doc; @@ -181,63 +191,63 @@ final class ExactPhraseScorer extends Scorer { } } } - + @Override public String toString() { return "ExactPhraseScorer(" + weight + ")"; } - + @Override public int freq() { return freq; } - + @Override public int docID() { return docID; } - + @Override public float score() { return docScorer.score(docID, freq); } - + private int phraseFreq() throws IOException { - + freq = 0; - + // init chunks - for(int i=0;i cs.lastPos) { cs.lastPos = cs.pos; final int posIndex = cs.pos - chunkStart; @@ -245,7 +255,7 @@ final class ExactPhraseScorer extends Scorer { assert gens[posIndex] != gen; gens[posIndex] = gen; } - + if (cs.posUpto == cs.posLimit) { end = true; break; @@ -254,13 +264,13 @@ final class ExactPhraseScorer extends Scorer { cs.pos = cs.offset + cs.posEnum.nextPosition(); } } - + // middle terms boolean any = true; - for(int t=1;t cs.lastPos) { cs.lastPos = cs.pos; final int posIndex = cs.pos - chunkStart; @@ -270,7 +280,7 @@ final class ExactPhraseScorer extends Scorer { any = true; } } - + if (cs.posUpto == cs.posLimit) { end = true; break; @@ -278,32 +288,33 @@ final class ExactPhraseScorer extends Scorer { cs.posUpto++; cs.pos = cs.offset + cs.posEnum.nextPosition(); } - + if (!any) { break; } } - + if (!any) { // petered out for this chunk chunkStart += CHUNK; chunkEnd += CHUNK; continue; } - + // last term - + { final ChunkState cs = chunkStates[endMinus1]; - while(cs.pos < chunkEnd) { + while (cs.pos < chunkEnd) { if (cs.pos > cs.lastPos) { cs.lastPos = cs.pos; final int posIndex = cs.pos - chunkStart; - if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) { + if (posIndex >= 0 && gens[posIndex] == gen + && counts[posIndex] == endMinus1) { freq++; } } - + if (cs.posUpto == cs.posLimit) { end = true; break; @@ -312,15 +323,26 @@ final class ExactPhraseScorer extends Scorer { cs.pos = cs.offset + cs.posEnum.nextPosition(); } } - + chunkStart += CHUNK; chunkEnd += CHUNK; } - + return freq; } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + TermIntervalIterator[] posIters = new TermIntervalIterator[chunkStates.length]; + DocsAndPositionsEnum[] enums = new DocsAndPositionsEnum[chunkStates.length]; + for (int i = 0; i < chunkStates.length; i++) { + posIters[i] = new TermIntervalIterator(this, enums[i] = chunkStates[i].factory.docsAndPositionsEnum(), + false, collectIntervals, field); + } + return new SloppyPhraseScorer.AdvancingIntervalIterator(this, collectIntervals, enums, new BlockIntervalIterator(this, collectIntervals, posIters)); + } + + @Override public long cost() { return cost; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java index e2a50c8..b84feba 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java @@ -17,6 +17,9 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.search.intervals.IntervalIterator; + +import java.io.IOException; import java.util.Collection; /** Used by {@link BulkScorer}s that need to pass a {@link @@ -49,7 +52,12 @@ final class FakeScorer extends Scorer { public int nextDoc() { throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()"); } - + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException("FakeScorer doesn't support intervals()"); + } + @Override public float score() { return score; diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java b/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java index 247bb03..d770115 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterCollector.java @@ -1,9 +1,9 @@ package org.apache.lucene.search; -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; +import java.io.IOException; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterLeafCollector.java b/lucene/core/src/java/org/apache/lucene/search/FilterLeafCollector.java index e3ae9a8..1e8d818 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterLeafCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterLeafCollector.java @@ -49,6 +49,11 @@ public class FilterLeafCollector implements LeafCollector { } @Override + public Weight.PostingFeatures postingFeatures() { + return in.postingFeatures(); + } + + @Override public String toString() { return getClass().getSimpleName() + "(" + in + ")"; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java index 6b64d8a..607115e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterScorer.java @@ -17,11 +17,12 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.util.AttributeSource; + import java.io.IOException; import java.util.Collection; -import org.apache.lucene.util.AttributeSource; - /** * A {@code FilterScorer} contains another {@code Scorer}, which it * uses as its basic source of data, possibly transforming the data along the @@ -79,4 +80,9 @@ abstract class FilterScorer extends Scorer { public AttributeSource attributes() { return in.attributes(); } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return in.intervals(collectIntervals); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java b/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java index d700a30..ab854d9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java @@ -20,6 +20,8 @@ package org.apache.lucene.search; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Weight.PostingFeatures; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; @@ -124,7 +126,7 @@ public class FilteredQuery extends Query { // return a filtering scorer @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { assert filter != null; DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs); @@ -133,12 +135,13 @@ public class FilteredQuery extends Query { return null; } - return strategy.filteredScorer(context, weight, filterDocIdSet); + return strategy.filteredScorer(context, weight, filterDocIdSet, flags); } // return a filtering top scorer @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, PostingFeatures flags, Bits acceptDocs) throws IOException { + assert filter != null; DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs); @@ -147,7 +150,9 @@ public class FilteredQuery extends Query { return null; } - return strategy.filteredBulkScorer(context, weight, scoreDocsInOrder, filterDocIdSet); + + return strategy.filteredBulkScorer(context, weight, scoreDocsInOrder, filterDocIdSet, flags); + } }; } @@ -189,7 +194,6 @@ public class FilteredQuery extends Query { return scorerDoc = doc; } } - @Override public int docID() { return scorerDoc; @@ -209,6 +213,11 @@ public class FilteredQuery extends Query { } @Override + public IntervalIterator intervals(boolean collectIntervals) + throws IOException { + return scorer.intervals(collectIntervals); + } + public long cost() { return scorer.cost(); } @@ -319,6 +328,11 @@ public class FilteredQuery extends Query { } @Override + public IntervalIterator intervals(boolean collectIntervals) + throws IOException { + return scorer.intervals(collectIntervals); + } + public long cost() { return Math.min(primary.cost(), secondary.cost()); } @@ -480,12 +494,13 @@ public class FilteredQuery extends Query { * the {@link AtomicReaderContext} for which to return the {@link Scorer}. * @param weight the {@link FilteredQuery} {@link Weight} to create the filtered scorer. * @param docIdSet the filter {@link DocIdSet} to apply + * @param flags the low level {@link PostingFeatures} for this scorer. * @return a filtered scorer * * @throws IOException if an {@link IOException} occurs */ public abstract Scorer filteredScorer(AtomicReaderContext context, - Weight weight, DocIdSet docIdSet) throws IOException; + Weight weight, DocIdSet docIdSet, PostingFeatures flags) throws IOException; /** * Returns a filtered {@link BulkScorer} based on this @@ -500,8 +515,8 @@ public class FilteredQuery extends Query { * @return a filtered top scorer */ public BulkScorer filteredBulkScorer(AtomicReaderContext context, - Weight weight, boolean scoreDocsInOrder, DocIdSet docIdSet) throws IOException { - Scorer scorer = filteredScorer(context, weight, docIdSet); + Weight weight, boolean scoreDocsInOrder, DocIdSet docIdSet, PostingFeatures flags) throws IOException { + Scorer scorer = filteredScorer(context, weight, docIdSet, flags); if (scorer == null) { return null; } @@ -509,6 +524,7 @@ public class FilteredQuery extends Query { // ignore scoreDocsInOrder: return new Weight.DefaultBulkScorer(scorer); } + } /** @@ -522,7 +538,7 @@ public class FilteredQuery extends Query { public static class RandomAccessFilterStrategy extends FilterStrategy { @Override - public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet) throws IOException { + public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet, PostingFeatures flags) throws IOException { final DocIdSetIterator filterIter = docIdSet.iterator(); if (filterIter == null) { // this means the filter does not accept any documents. @@ -539,12 +555,12 @@ public class FilteredQuery extends Query { final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc); if (useRandomAccess) { // if we are using random access, we return the inner scorer, just with other acceptDocs - return weight.scorer(context, filterAcceptDocs); + return weight.scorer(context, flags, filterAcceptDocs); } else { assert firstFilterDoc > -1; // we are gonna advance() this scorer, so we set inorder=true/toplevel=false // we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice - final Scorer scorer = weight.scorer(context, null); + final Scorer scorer = weight.scorer(context, flags, null); // TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer return (scorer == null) ? null : new PrimaryAdvancedLeapFrogScorer(weight, firstFilterDoc, filterIter, scorer); } @@ -578,14 +594,14 @@ public class FilteredQuery extends Query { @Override public Scorer filteredScorer(AtomicReaderContext context, - Weight weight, DocIdSet docIdSet) throws IOException { + Weight weight, DocIdSet docIdSet, PostingFeatures flags) throws IOException { final DocIdSetIterator filterIter = docIdSet.iterator(); if (filterIter == null) { // this means the filter does not accept any documents. return null; } // we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice - final Scorer scorer = weight.scorer(context, null); + final Scorer scorer = weight.scorer(context, flags, null); if (scorer == null) { return null; } @@ -614,15 +630,14 @@ public class FilteredQuery extends Query { private static final class QueryFirstFilterStrategy extends FilterStrategy { @Override public Scorer filteredScorer(final AtomicReaderContext context, - Weight weight, - DocIdSet docIdSet) throws IOException { + Weight weight, DocIdSet docIdSet, PostingFeatures flags) throws IOException { Bits filterAcceptDocs = docIdSet.bits(); if (filterAcceptDocs == null) { // Filter does not provide random-access Bits; we // must fallback to leapfrog: - return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet); + return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet, flags); } - final Scorer scorer = weight.scorer(context, null); + final Scorer scorer = weight.scorer(context, flags, null); return scorer == null ? null : new QueryFirstScorer(weight, filterAcceptDocs, scorer); } @@ -631,14 +646,14 @@ public class FilteredQuery extends Query { public BulkScorer filteredBulkScorer(final AtomicReaderContext context, Weight weight, boolean scoreDocsInOrder, // ignored (we always top-score in order) - DocIdSet docIdSet) throws IOException { + DocIdSet docIdSet, PostingFeatures flags) throws IOException { Bits filterAcceptDocs = docIdSet.bits(); if (filterAcceptDocs == null) { // Filter does not provide random-access Bits; we // must fallback to leapfrog: - return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredBulkScorer(context, weight, scoreDocsInOrder, docIdSet); + return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredBulkScorer(context, weight, scoreDocsInOrder, docIdSet, flags); } - final Scorer scorer = weight.scorer(context, null); + final Scorer scorer = weight.scorer(context, flags, null); return scorer == null ? null : new QueryFirstBulkScorer(scorer, filterAcceptDocs); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 8f1a5f6..655003b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -17,6 +17,23 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.ReaderUtil; +import org.apache.lucene.index.StoredDocument; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.Terms; +import org.apache.lucene.search.similarities.DefaultSimilarity; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.util.ThreadInterruptedException; + import java.io.IOException; import java.util.Arrays; import java.util.Iterator; @@ -32,23 +49,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DirectoryReader; // javadocs -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiFields; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.index.StoredDocument; -import org.apache.lucene.index.StoredFieldVisitor; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermContext; -import org.apache.lucene.index.Terms; -import org.apache.lucene.search.similarities.DefaultSimilarity; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.store.NIOFSDirectory; // javadoc -import org.apache.lucene.util.ThreadInterruptedException; -import org.apache.lucene.index.IndexWriter; // javadocs - /** Implements search over a single IndexReader. * *

Applications usually need only call the inherited @@ -608,7 +608,7 @@ public class IndexSearcher { // continue with the following leaf continue; } - BulkScorer scorer = weight.bulkScorer(ctx, !leafCollector.acceptsDocsOutOfOrder(), ctx.reader().getLiveDocs()); + BulkScorer scorer = weight.bulkScorer(ctx, !leafCollector.acceptsDocsOutOfOrder(), leafCollector.postingFeatures(), ctx.reader().getLiveDocs()); if (scorer != null) { try { scorer.score(leafCollector); diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java index 562e76d..9ea42e2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java @@ -17,10 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; +import java.io.IOException; + /** *

Collector decouples the score from the collected doc: * the score computation is skipped entirely if it's not @@ -118,4 +118,9 @@ public interface LeafCollector { */ boolean acceptsDocsOutOfOrder(); + /** + * Returns the posting features required by this collector. + */ + public Weight.PostingFeatures postingFeatures(); + } diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index 8f2edd7..5fca74c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -20,6 +20,7 @@ package org.apache.lucene.search; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.Bits; @@ -79,6 +80,10 @@ public class MatchAllDocsQuery extends Query { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException("MatchAllDocsQuery doesn't support IntervalIterators"); + } + public long cost() { return maxDoc; } @@ -114,7 +119,7 @@ public class MatchAllDocsQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { return new MatchAllScorer(context.reader(), acceptDocs, this, queryWeight); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java index a2cb61b..3742b21 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java @@ -17,14 +17,16 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.search.intervals.CombinedIntervalIterator; +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.util.ArrayUtil; + import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.List; -import org.apache.lucene.util.ArrayUtil; - /** * A Scorer for OR like queries, counterpart of ConjunctionScorer. * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. @@ -212,6 +214,11 @@ class MinShouldMatchSumScorer extends Scorer { } } + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return new CombinedIntervalIterator(this, collectIntervals, pullIterators(collectIntervals, sortedSubScorers)); + } + /** * Returns the score of the current document matching the query. Initially * invalid, until {@link #nextDoc()} is called the first time. diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java index 859b893..0979d00 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java @@ -17,13 +17,11 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; + import java.io.IOException; import java.util.Arrays; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.Scorer; - /** * A {@link Collector} which allows running a search with several * {@link Collector}s. It offers a static {@link #wrap} method which accepts a @@ -136,6 +134,16 @@ public class MultiCollector implements Collector { return true; } + @Override + public Weight.PostingFeatures postingFeatures() { + Weight.PostingFeatures features = Weight.PostingFeatures.DOCS_ONLY; + for (LeafCollector c : collectors) { + if (c.postingFeatures().compareTo(features) > 0) + features = c.postingFeatures(); + } + return features; + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index fe326b7..8d8686d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,12 +17,9 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.*; - +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; @@ -31,14 +28,29 @@ import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.search.PhraseQuery.TermDocsEnumFactory; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntroSorter; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Set; + /** * MultiPhraseQuery is a generalized version of PhraseQuery, with an added * method {@link #add(Term[])}. @@ -179,7 +191,7 @@ public class MultiPhraseQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { assert !termArrays.isEmpty(); final AtomicReader reader = context.reader(); final Bits liveDocs = acceptDocs; @@ -199,7 +211,7 @@ public class MultiPhraseQuery extends Query { final DocsAndPositionsEnum postingsEnum; int docFreq; - + TermDocsEnumFactory factory; if (terms.length > 1) { postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum); @@ -221,6 +233,7 @@ public class MultiPhraseQuery extends Query { // None of the terms are in this reader return null; } + factory = new MultiTermDocsEnumFactory(liveDocs, context, terms, termContexts, termsEnum, flags); } else { final Term term = terms[0]; TermState termState = termContexts.get(term).get(context.ord); @@ -237,10 +250,10 @@ public class MultiPhraseQuery extends Query { throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); } - docFreq = termsEnum.docFreq(); + factory = new TermDocsEnumFactory(term.bytes(), termState, termsEnum, flags, acceptDocs); } - - postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms); + + postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, factory, termsEnum.docFreq() , positions.get(pos), terms); } // sort by increasing docFreq order @@ -249,20 +262,20 @@ public class MultiPhraseQuery extends Query { } if (slop == 0) { - ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context)); + ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), field); if (s.noDocs) { return null; } else { return s; } } else { - return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context)); + return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), field); } } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, context.reader().getLiveDocs()); + Scorer scorer = scorer(context, PostingFeatures.POSITIONS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { @@ -406,6 +419,27 @@ public class MultiPhraseQuery extends Query { } return true; } + + private static class MultiTermDocsEnumFactory extends TermDocsEnumFactory { + + AtomicReaderContext context; + Term[] terms; + Map termContexts; + + MultiTermDocsEnumFactory(Bits liveDocs, AtomicReaderContext context, Term[] terms, + Map termContexts, TermsEnum termsEnum, PostingFeatures flags) throws IOException { + super(termsEnum, flags, liveDocs); + this.context = context; + this.terms = terms; + this.termContexts = termContexts; + } + + @Override + public DocsAndPositionsEnum docsAndPositionsEnum() throws IOException { + return new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum, flags); + } + + } } /** @@ -434,25 +468,41 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { } } - private static final class IntQueue { - private int _arraySize = 16; + // TODO: Reimplement this as int[_arraySize * 3], storing position at i * 3, + // startOffset at i * 3 + 1 and endOffset at i * 3 + 2. Will need to also + // implement a new SorterTemplate to sort the array. + + private static final class PositionQueue { + private int _arraySize = 48; private int _index = 0; private int _lastIndex = 0; private int[] _array = new int[_arraySize]; - final void add(int i) { - if (_lastIndex == _arraySize) + final void add(int pos, int start, int end) { + if (_lastIndex * 3 == _arraySize) growArray(); - _array[_lastIndex++] = i; + _array[_lastIndex * 3] = pos; + _array[_lastIndex * 3 + 1] = start; + _array[_lastIndex * 3 + 2] = end; + _lastIndex += 1; } final int next() { - return _array[_index++]; + return _array[_index++ * 3]; + } + + final int startOffset() { + return _array[(_index - 1) * 3 + 1]; + } + + final int endOffset() { + return _array[(_index - 1) * 3 + 2]; } final void sort() { - Arrays.sort(_array, _index, _lastIndex); + //Arrays.sort(_array, _index, _lastIndex); + sorter.sort(_index, _lastIndex); } final void clear() { @@ -470,16 +520,54 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { _array = newArray; _arraySize *= 2; } + + private IntroSorter sorter = new IntroSorter() { + private int pivot; + + @Override + protected void swap(int i, int j) { + int ti = _array[i * 3]; + int ts = _array[i * 3 + 1]; + int te = _array[i * 3 + 2]; + _array[i * 3] = _array[j * 3]; + _array[i * 3 + 1] = _array[j * 3 + 1]; + _array[i * 3 + 2] = _array[j * 3 + 2]; + _array[j * 3] = ti; + _array[j * 3 + 1] = ts; + _array[j * 3 + 2] = te; + } + + @Override + protected int compare(int i, int j) { + return _array[i * 3] - _array[j * 3]; + } + + @Override + protected void setPivot(int i) { + pivot = i; + } + + @Override + protected int comparePivot(int j) { + return pivot - _array[j * 3]; + } + }; } private int _doc; private int _freq; private DocsQueue _queue; - private IntQueue _posList; + private PositionQueue _posList; private long cost; - public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map termContexts, TermsEnum termsEnum) throws IOException { + public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, + Map termContexts, TermsEnum termsEnum) throws IOException { + this(liveDocs, context, terms, termContexts, termsEnum, PostingFeatures.POSITIONS); + } + + public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map termContexts, TermsEnum termsEnum, PostingFeatures flags) throws IOException { List docsEnums = new LinkedList<>(); + for (int i = 0; i < terms.length; i++) { final Term term = terms[i]; TermState termState = termContexts.get(term).get(context.ord); @@ -498,7 +586,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { } _queue = new DocsQueue(docsEnums); - _posList = new IntQueue(); + _posList = new PositionQueue(); } @Override @@ -520,7 +608,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { final int freq = postings.freq(); for (int i = 0; i < freq; i++) { - _posList.add(postings.nextPosition()); + _posList.add(postings.nextPosition(), postings.startOffset(), postings.endOffset()); } if (postings.nextDoc() != NO_MORE_DOCS) { @@ -543,12 +631,12 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { @Override public int startOffset() { - return -1; + return _posList.startOffset(); } @Override public int endOffset() { - return -1; + return _posList.endOffset(); } @Override @@ -568,7 +656,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { } @Override - public final int freq() { + public final int freq() throws IOException { return _freq; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java index 7fb8da6..d3249ca 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java @@ -17,17 +17,17 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.index.FilteredTermsEnum; // javadocs +import org.apache.lucene.index.FilteredTermsEnum; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.SingleTermsEnum; // javadocs +import org.apache.lucene.index.SingleTermsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.AttributeSource; +import java.io.IOException; + /** * An abstract {@link Query} that matches documents * containing a subset of terms provided by a {@link @@ -62,7 +62,7 @@ import org.apache.lucene.util.AttributeSource; * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default. */ public abstract class MultiTermQuery extends Query { - protected final String field; + protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; /** Abstract class that defines how the query is rewritten. */ @@ -243,6 +243,8 @@ public abstract class MultiTermQuery extends Query { } }; + protected final String field; + /** * Constructs a query matching terms that cannot be represented with a single * Term. diff --git a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java index c975b01..3f38845 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java @@ -17,8 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Term; + import java.io.IOException; -import org.apache.lucene.index.*; /** * Position of a term in a document that takes into account the term offset within the phrase. @@ -44,6 +46,7 @@ final class PhrasePositions { final boolean next() throws IOException { // increments to next doc doc = postings.nextDoc(); + if (doc == DocIdSetIterator.NO_MORE_DOCS) { return false; } @@ -80,10 +83,14 @@ final class PhrasePositions { /** for debug purposes */ @Override public String toString() { - String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count; + String s = "d:"+doc+" offset:"+offset+" position:"+position+" c:"+count; if (rptGroup >=0 ) { s += " rpt:"+rptGroup+",i"+rptInd; } + s += " t: [" + terms[0]; + for (int i = 1; i < terms.length; i++) + s += "," + terms[1]; + s += "]"; return s; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index f19ae22..1a3a486 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,28 +17,30 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Set; - +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Set; + /** A Query that matches documents containing a particular sequence of terms. * A PhraseQuery is built by QueryParser for input like "new york". * @@ -138,13 +140,15 @@ public class PhraseQuery extends Query { } static class PostingsAndFreq implements Comparable { + final TermDocsEnumFactory factory; final DocsAndPositionsEnum postings; final int docFreq; final int position; final Term[] terms; final int nTerms; // for faster comparisons - public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term... terms) { + public PostingsAndFreq(DocsAndPositionsEnum postings, TermDocsEnumFactory factory, int docFreq, int position, Term... terms) throws IOException { + this.factory = factory; this.postings = postings; this.docFreq = docFreq; this.position = position; @@ -245,7 +249,7 @@ public class PhraseQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { assert !terms.isEmpty(); final AtomicReader reader = context.reader(); final Bits liveDocs = acceptDocs; @@ -276,7 +280,8 @@ public class PhraseQuery extends Query { // term does exist, but has no positions throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")"); } - postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t); + TermDocsEnumFactory factory = new TermDocsEnumFactory(t.bytes(), state, te, flags, acceptDocs); + postingsFreqs[i] = new PostingsAndFreq(postingsEnum, factory, te.docFreq(), positions.get(i).intValue(), t); } // sort by increasing docFreq order @@ -285,7 +290,7 @@ public class PhraseQuery extends Query { } if (slop == 0) { // optimize exact case - ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context)); + ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), field); if (s.noDocs) { return null; } else { @@ -293,7 +298,7 @@ public class PhraseQuery extends Query { } } else { return - new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context)); + new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), field); } } @@ -304,7 +309,7 @@ public class PhraseQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, context.reader().getLiveDocs()); + Scorer scorer = scorer(context, PostingFeatures.POSITIONS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { @@ -402,4 +407,33 @@ public class PhraseQuery extends Query { ^ positions.hashCode(); } + static class TermDocsEnumFactory { + protected final TermsEnum termsEnum; + protected final Bits liveDocs; + protected final PostingFeatures flags; + + private final BytesRef term; + private final TermState termState; + + TermDocsEnumFactory(TermsEnum termsEnum, PostingFeatures flags, Bits liveDocs) { + this(null, null, termsEnum, flags, liveDocs); + } + + TermDocsEnumFactory(BytesRef term, TermState termState, TermsEnum termsEnum, PostingFeatures flags, Bits liveDocs) { + this.termsEnum = termsEnum; + this.termState = termState; + this.liveDocs = liveDocs; + this.term = term; + this.flags = flags; + } + + + public DocsAndPositionsEnum docsAndPositionsEnum() + throws IOException { + assert term != null; + termsEnum.seekExact(term, termState); + return termsEnum.docsAndPositions(liveDocs, null, flags.docsAndPositionsFlags()); + } + + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/Query.java b/lucene/core/src/java/org/apache/lucene/search/Query.java index 92dc692..5521eba 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Query.java +++ b/lucene/core/src/java/org/apache/lucene/search/Query.java @@ -17,13 +17,12 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - -import java.util.Set; - import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import java.io.IOException; +import java.util.Set; + /** The abstract base class for queries.

Instantiable subclasses are:

    @@ -70,6 +69,10 @@ public abstract class Query implements Cloneable { return toString(""); } + public String getField() { + return null; + } + /** * Expert: Constructs an appropriate Weight implementation for this query. * diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java index 755c3cd..d2baaca 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java @@ -17,13 +17,13 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; + import java.io.IOException; import java.util.Arrays; import java.util.Comparator; import java.util.List; -import org.apache.lucene.index.AtomicReaderContext; - /** A {@link Rescorer} that uses a provided Query to assign * scores to the first-pass hits. * @@ -82,7 +82,7 @@ public abstract class QueryRescorer extends Rescorer { if (readerContext != null) { // We advanced to another segment: docBase = readerContext.docBase; - scorer = weight.scorer(readerContext, null); + scorer = weight.scorer(readerContext, Weight.PostingFeatures.DOCS_AND_FREQS, null); } if(scorer != null) { diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java b/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java index 1d6c8ff..ce65632 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java @@ -17,11 +17,12 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.util.Bits; +import java.io.IOException; + /** * Constrains search results to only match those which also match a provided * query. @@ -56,7 +57,7 @@ public class QueryWrapperFilter extends Filter { return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { - return weight.scorer(privateContext, acceptDocs); + return weight.scorer(privateContext, PostingFeatures.DOCS_AND_FREQS, acceptDocs); } @Override public boolean isCacheable() { return false; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java index 4e2a5f1..2a5b33e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -17,6 +17,9 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.search.intervals.CombinedIntervalIterator; +import org.apache.lucene.search.intervals.IntervalIterator; + import java.io.IOException; import java.util.Collection; import java.util.Collections; @@ -130,6 +133,12 @@ class ReqExclScorer extends Scorer { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + if (reqScorer == null) + return IntervalIterator.NO_MORE_INTERVALS; + return new CombinedIntervalIterator(this, collectIntervals, reqScorer.intervals(collectIntervals)); + } + public long cost() { return reqScorer.cost(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index d7b4d86..a77e82a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -16,6 +16,9 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.search.intervals.DisjunctionIntervalIterator; +import org.apache.lucene.search.intervals.IntervalIterator; + import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -86,6 +89,11 @@ class ReqOptSumScorer extends Scorer { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return new DisjunctionIntervalIterator(this, collectIntervals, pullIterators(collectIntervals, reqScorer, optScorer)); + } + + @Override public int freq() throws IOException { // we might have deferred advance() score(); diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java index 929d3b9..87dabcd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java @@ -17,12 +17,13 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.intervals.IntervalIterator; + import java.io.IOException; import java.util.Collection; import java.util.Collections; -import org.apache.lucene.index.DocsEnum; - /** * Expert: Common scoring functionality for different types of queries. * @@ -53,6 +54,50 @@ public abstract class Scorer extends DocsEnum { protected Scorer(Weight weight) { this.weight = weight; } + + /** + * Expert: Retrieves an {@link IntervalIterator} for this scorer allowing + * access to position and offset intervals for each + * matching document. Call this up-front and use it as + * long as you are still using this scorer. The + * returned iterator is bound to scorer that created it; + * after {@link #nextDoc} or {@link #advance} you must + * call {@link IntervalIterator#scorerAdvanced} before + * iterating over that document's intervals. + * + * @param collectIntervals + * if true the {@link IntervalIterator} can be used to + * collect all individual sub-intervals this {@link IntervalIterator} + * is composed of via + * {@link IntervalIterator#collect(org.apache.lucene.search.intervals.IntervalCollector)} + * @return an {@link IntervalIterator} over matching intervals + * @throws IOException + * if a low-level I/O error is encountered + * + * @lucene.experimental + */ + public abstract IntervalIterator intervals(boolean collectIntervals) throws IOException; + + /** + * Get the IntervalIterators from a list of scorers + * @param collectIntervals true if positions will be collected + * @param scorers the list of scorers to retrieve IntervalIterators from + * @return a list of IntervalIterators pulled from the passed in Scorers + * @throws java.io.IOException if a low-evel I/O error is encountered + */ + public static IntervalIterator[] pullIterators(boolean collectIntervals, Scorer... scorers) + throws IOException { + IntervalIterator[] iterators = new IntervalIterator[scorers.length]; + for (int i = 0; i < scorers.length; i++) { + if (scorers[i] == null) { + iterators[i] = IntervalIterator.NO_MORE_INTERVALS; + } + else { + iterators[i] = scorers[i].intervals(collectIntervals); + } + } + return iterators; + } /** Returns the score of the current document matching the query. * Initially invalid, until {@link #nextDoc()} or {@link #advance(int)} diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java index 954b2bb..657dd46 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java @@ -17,20 +17,20 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.MultiTermQuery.RewriteMethod; - import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; -import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; /** * Base rewrite method that translates each term into a query, and keeps @@ -88,7 +88,7 @@ public abstract class ScoringRewrite extends TermCollectingRewr public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { final BooleanQuery bq = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query); // strip the scores off - final Query result = new ConstantScoreQuery(bq); + final Query result = new ConstantScoreQuery(query.getField(), bq); result.setBoost(query.getBoost()); return result; } diff --git a/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java b/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java index 5803b2e..863dc3f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/SimpleCollector.java @@ -17,10 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; +import java.io.IOException; + /** * Base {@link Collector} implementation that is used to collect all contexts. * @@ -42,7 +42,12 @@ public abstract class SimpleCollector implements Collector, LeafCollector { // no-op by default } - // redeclare methods so that javadocs are inherited on sub-classes + @Override + public Weight.PostingFeatures postingFeatures() { + return Weight.PostingFeatures.DOCS_AND_FREQS; + } + +// redeclare methods so that javadocs are inherited on sub-classes @Override public abstract boolean acceptsDocsOutOfOrder(); diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index 986ab06..d30049c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -17,25 +17,36 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.intervals.Interval; +import org.apache.lucene.search.intervals.IntervalCollector; +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.search.intervals.SloppyIntervalIterator; +import org.apache.lucene.search.intervals.TermIntervalIterator; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.FixedBitSet; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; - -import org.apache.lucene.index.Term; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.util.FixedBitSet; +import java.util.List; +import java.util.Map; final class SloppyPhraseScorer extends Scorer { private PhrasePositions min, max; private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq(). + private final PhraseQuery.PostingsAndFreq[] postings; private final Similarity.SimScorer docScorer; - + private final String field; + private final int slop; private final int numPostings; private final PhraseQueue pq; // for advancing min position @@ -52,9 +63,11 @@ final class SloppyPhraseScorer extends Scorer { private final long cost; SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - int slop, Similarity.SimScorer docScorer) { + int slop, Similarity.SimScorer docScorer, String field) { super(weight); this.docScorer = docScorer; + this.postings = postings; + this.field = field; this.slop = slop; this.numPostings = postings==null ? 0 : postings.length; pq = new PhraseQueue(postings.length); @@ -602,4 +615,96 @@ final class SloppyPhraseScorer extends Scorer { @Override public String toString() { return "scorer(" + weight + ")"; } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + Map map = new HashMap(); + List enums = new ArrayList(); + + for (int i = 0; i < postings.length; i++) { + if (postings[i].terms.length > 1) { + throw new UnsupportedOperationException("IntervalIterators for MulitPhraseQuery is not supported"); + } + Term term = postings[i].terms[0]; + IterAndOffsets iterAndOffset; + if (!map.containsKey(term)) { + DocsAndPositionsEnum docsAndPosEnum = postings[i].factory + .docsAndPositionsEnum(); + enums.add(docsAndPosEnum); + iterAndOffset = new IterAndOffsets(new TermIntervalIterator(this, docsAndPosEnum, false, + collectIntervals, field)); + map.put(term, iterAndOffset); + } else { + iterAndOffset = map.get(term); + } + iterAndOffset.offsets.add(postings[i].position); + } + Collection values = map.values(); + IntervalIterator[] iters = new IntervalIterator[values.size()]; + int i = 0; + for (IterAndOffsets iterAndOffsets : values) { + iters[i++] = SloppyIntervalIterator.create(this, collectIntervals, iterAndOffsets.iter, iterAndOffsets.toIntArray()); + } + return new AdvancingIntervalIterator(this, collectIntervals, enums.toArray(new DocsAndPositionsEnum[enums.size()]), new SloppyIntervalIterator(this, slop, collectIntervals, iters)); + } + + private final static class IterAndOffsets { + final List offsets = new ArrayList(); + final IntervalIterator iter; + + IterAndOffsets(IntervalIterator iter) { + this.iter = iter; + } + + int[] toIntArray() { + int[] array = new int[offsets.size()]; + for (int i = 0; i < array.length; i++) { + array[i] = offsets.get(i).intValue(); + } + return array; + } + } + + final static class AdvancingIntervalIterator extends IntervalIterator { + + public AdvancingIntervalIterator(Scorer scorer, boolean collectIntervals, final DocsAndPositionsEnum[] enums, final IntervalIterator delegate) { + super(scorer, collectIntervals); + this.enums = enums; + this.delegate = delegate; + } + + private final DocsAndPositionsEnum[] enums; + private final IntervalIterator delegate; + @Override + public int scorerAdvanced(int docId) throws IOException { + assert docId == docID(); + for (DocsAndPositionsEnum oneEnum : enums) { + int advance = oneEnum.advance(docId); + assert advance == docId; + } + delegate.scorerAdvanced(docId); + return docId; + } + + @Override + public Interval next() throws IOException { + return delegate.next(); + } + + @Override + public void collect(IntervalCollector collector) { + delegate.collect(collector); + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return delegate.subs(inOrder); + } + + @Override + public int matchDistance() { + return delegate.matchDistance(); + } + + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 5435ccd..b0a8370 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -17,71 +17,80 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.Set; - +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; -/** A Query that matches documents containing a term. - This may be combined with other terms with a {@link BooleanQuery}. - */ +import java.io.IOException; +import java.util.Set; + +/** + * A Query that matches documents containing a term. This may be combined with + * other terms with a {@link BooleanQuery}. + */ public class TermQuery extends Query { private final Term term; private final int docFreq; private final TermContext perReaderTermState; - + final class TermWeight extends Weight { private final Similarity similarity; private final Similarity.SimWeight stats; private final TermContext termStates; - + public TermWeight(IndexSearcher searcher, TermContext termStates) - throws IOException { + throws IOException { assert termStates != null : "TermContext must not be null"; this.termStates = termStates; this.similarity = searcher.getSimilarity(); - this.stats = similarity.computeWeight( - getBoost(), - searcher.collectionStatistics(term.field()), + this.stats = similarity.computeWeight(getBoost(), + searcher.collectionStatistics(term.field()), searcher.termStatistics(term, termStates)); } - + @Override - public String toString() { return "weight(" + TermQuery.this + ")"; } - + public String toString() { + return "weight(" + TermQuery.this + ")"; + } + @Override - public Query getQuery() { return TermQuery.this; } - + public Query getQuery() { + return TermQuery.this; + } + @Override public float getValueForNormalization() { return stats.getValueForNormalization(); } - + @Override public void normalize(float queryNorm, float topLevelBoost) { stats.normalize(queryNorm, topLevelBoost); } - + @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); final TermsEnum termsEnum = getTermsEnum(context); if (termsEnum == null) { return null; } - DocsEnum docs = termsEnum.docs(acceptDocs, null); + DocsEnum docs; + if (flags.compareTo(PostingFeatures.POSITIONS) < 0) { + docs = termsEnum.docs(acceptDocs, null, flags.docFlags()); + } else { + docs = termsEnum.docsAndPositions(acceptDocs, null, flags.docsAndPositionsFlags()); + } assert docs != null; return new TermScorer(this, docs, similarity.simScorer(stats, context)); } @@ -96,90 +105,104 @@ public class TermQuery extends Query { assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term; return null; } - //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null")); - final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null); + // System.out.println("LD=" + reader.getLiveDocs() + " set?=" + + // (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null")); + final TermsEnum termsEnum = context.reader().terms(term.field()) + .iterator(null); termsEnum.seekExact(term.bytes(), state); return termsEnum; } private boolean termNotInReader(AtomicReader reader, Term term) throws IOException { // only called from assert - //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString()); + // System.out.println("TQ.termNotInReader reader=" + reader + " term=" + + // field + ":" + bytes.utf8ToString()); return reader.docFreq(term) == 0; } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, context.reader().getLiveDocs()); + Scorer scorer = scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { float freq = scorer.freq(); SimScorer docScorer = similarity.simScorer(stats, context); ComplexExplanation result = new ComplexExplanation(); - result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); - Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq)); + result.setDescription("weight(" + getQuery() + " in " + doc + ") [" + + similarity.getClass().getSimpleName() + "], result of:"); + Explanation scoreExplanation = docScorer.explain(doc, + new Explanation(freq, "termFreq=" + freq)); result.addDetail(scoreExplanation); result.setValue(scoreExplanation.getValue()); result.setMatch(true); return result; } } - return new ComplexExplanation(false, 0.0f, "no matching term"); + return new ComplexExplanation(false, 0.0f, "no matching term"); } } - + /** Constructs a query for the term t. */ public TermQuery(Term t) { this(t, -1); } - - /** Expert: constructs a TermQuery that will use the - * provided docFreq instead of looking up the docFreq - * against the searcher. */ + + /** + * Expert: constructs a TermQuery that will use the provided docFreq instead + * of looking up the docFreq against the searcher. + */ public TermQuery(Term t, int docFreq) { term = t; this.docFreq = docFreq; perReaderTermState = null; } - /** Expert: constructs a TermQuery that will use the - * provided docFreq instead of looking up the docFreq - * against the searcher. */ + /** + * Expert: constructs a TermQuery that will use the provided docFreq instead + * of looking up the docFreq against the searcher. + */ public TermQuery(Term t, TermContext states) { assert states != null; term = t; docFreq = states.docFreq(); perReaderTermState = states; } - + /** Returns the term of this query. */ - public Term getTerm() { return term; } + public Term getTerm() { + return term; + } + + @Override + public String getField() { + return term.field(); + } @Override public Weight createWeight(IndexSearcher searcher) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; + if (perReaderTermState == null || perReaderTermState.topReaderContext != context) { // make TermQuery single-pass if we don't have a PRTS or if the context differs! termState = TermContext.build(context, term); } else { - // PRTS was pre-build for this IS - termState = this.perReaderTermState; + // PRTS was pre-build for this IS + termState = this.perReaderTermState; } - + // we must not ignore the given docFreq - if set use the given value (lie) - if (docFreq != -1) - termState.setDocFreq(docFreq); + if (docFreq != -1) termState.setDocFreq(docFreq); return new TermWeight(searcher, termState); } - + @Override public void extractTerms(Set terms) { terms.add(getTerm()); } - + /** Prints a user-readable version of this query. */ @Override public String toString(String field) { @@ -192,21 +215,20 @@ public class TermQuery extends Query { buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } - + /** Returns true iff o is equal to this. */ @Override public boolean equals(Object o) { - if (!(o instanceof TermQuery)) - return false; - TermQuery other = (TermQuery)o; + if (!(o instanceof TermQuery)) return false; + TermQuery other = (TermQuery) o; return (this.getBoost() == other.getBoost()) - && this.term.equals(other.term); + && this.term.equals(other.term); } - - /** Returns a hash code value for this object.*/ + + /** Returns a hash code value for this object. */ @Override public int hashCode() { return Float.floatToIntBits(getBoost()) ^ term.hashCode(); } - + } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 6697524..652b805 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -17,11 +17,14 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - +import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.search.intervals.TermIntervalIterator; import org.apache.lucene.search.similarities.Similarity; +import java.io.IOException; + /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { @@ -93,4 +96,12 @@ final class TermScorer extends Scorer { /** Returns a string representation of this TermScorer. */ @Override public String toString() { return "scorer(" + weight + ")"; } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + assert docsEnum instanceof DocsAndPositionsEnum; + String field = ((TermQuery) weight.getQuery()).getTerm().field(); + return new TermIntervalIterator(this, (DocsAndPositionsEnum) docsEnum, false, collectIntervals, field); + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index bfebeda..4c2d0ce 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -17,10 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; +import java.io.IOException; + /** * A {@link Collector} implementation that collects the top-scoring hits, * returning them as a {@link TopDocs}. This is used by {@link IndexSearcher} to @@ -308,4 +308,5 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java b/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java index 4fc5be6..5aa8d55 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TotalHitCountCollector.java @@ -17,6 +17,7 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.search.Weight.PostingFeatures; /** * Just counts the total number of hits. @@ -36,6 +37,12 @@ public class TotalHitCountCollector extends SimpleCollector { } @Override + public PostingFeatures postingFeatures() { + // we don't need frequencies here + return PostingFeatures.DOCS_ONLY; + } + + @Override public boolean acceptsDocsOutOfOrder() { return true; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 8398157..b9a1c15 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -17,14 +17,18 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.index.AtomicReader; // javadocs +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReaderContext; // javadocs + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReaderContext; // javadocs +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; +import java.io.IOException; + /** * Expert: Calculate query weights and build query scorers. *

    @@ -35,7 +39,8 @@ import org.apache.lucene.util.Bits; * {@link AtomicReader} dependent state should reside in the {@link Scorer}. *

    * Since {@link Weight} creates {@link Scorer} instances for a given - * {@link AtomicReaderContext} ({@link #scorer(AtomicReaderContext, Bits)}) + * {@link AtomicReaderContext} ({@link #scorer(AtomicReaderContext, + * PostingFeatures, Bits)}) * callers must maintain the relationship between the searcher's top-level * {@link IndexReaderContext} and the context used to create a {@link Scorer}. *

    @@ -50,7 +55,7 @@ import org.apache.lucene.util.Bits; *

  • The query normalization factor is passed to {@link #normalize(float, float)}. At * this point the weighting is complete. *
  • A Scorer is constructed by - * {@link #scorer(AtomicReaderContext, Bits)}. + * {@link #scorer(AtomicReaderContext, PostingFeatures, Bits)}. * * * @since 2.9 @@ -90,6 +95,7 @@ public abstract class Weight { * * @param context * the {@link AtomicReaderContext} for which to return the {@link Scorer}. + * @param flags the low level {@link PostingFeatures} for this scorer. * @param acceptDocs * Bits that represent the allowable docs to match (typically deleted docs * but possibly filtering other documents) @@ -97,7 +103,7 @@ public abstract class Weight { * @return a {@link Scorer} which scores documents in/out-of order. * @throws IOException if there is a low-level I/O error */ - public abstract Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException; + public abstract Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException; /** * Optional method, to return a {@link BulkScorer} to @@ -116,19 +122,15 @@ public abstract class Weight { * in-order scorer is also an out-of-order one. However, an * out-of-order scorer may not support {@link Scorer#nextDoc()} * and/or {@link Scorer#advance(int)}, therefore it is recommended to - * request an in-order scorer if use of these - * methods is required. * @param acceptDocs * Bits that represent the allowable docs to match (typically deleted docs * but possibly filtering other documents) - * - * @return a {@link BulkScorer} which scores documents and - * passes them to a collector. + * @return a {@link Scorer} which scores documents in/out-of order. * @throws IOException if there is a low-level I/O error */ - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, PostingFeatures flags, Bits acceptDocs) throws IOException { - Scorer scorer = scorer(context, acceptDocs); + Scorer scorer = scorer(context, flags, acceptDocs); if (scorer == null) { // No docs match return null; @@ -206,6 +208,58 @@ public abstract class Weight { * NOTE: the default implementation returns false, i.e. * the Scorer scores documents in-order. */ + + /** + * Feature flags used to control low-level posting list features. These flags + * all Collectors and scorers to specify their requirements for document + * collection and scoring ahead of time for best performance. + */ + public static enum PostingFeatures { + /**Only document IDs are required for document collection and scoring*/ + DOCS_ONLY(0, 0), + /**Document IDs and Term Frequencies are required for document collection and scoring*/ + DOCS_AND_FREQS(DocsEnum.FLAG_FREQS, 0), + /**Document IDs, Term Frequencies and Positions are required for document collection and scoring*/ + POSITIONS(DocsEnum.FLAG_FREQS, 0), + /**Document IDs, Term Frequencies, Positions and Payloads are required for document collection and scoring*/ + POSITIONS_AND_PAYLOADS(DocsEnum.FLAG_FREQS, DocsAndPositionsEnum.FLAG_PAYLOADS), + /**Document IDs, Term Frequencies, Positions and Offsets are required for document collection and scoring*/ + OFFSETS(DocsEnum.FLAG_FREQS, DocsAndPositionsEnum.FLAG_OFFSETS), + /**Document IDs, Term Frequencies, Positions, Offsets and Payloads are required for document collection and scoring*/ + OFFSETS_AND_PAYLOADS(DocsEnum.FLAG_FREQS, DocsAndPositionsEnum.FLAG_OFFSETS + | DocsAndPositionsEnum.FLAG_PAYLOADS); + + private final int docsAndPositionsFlags; + private final int docFlags; + + private PostingFeatures(int docFlags, int docsAndPositionsFlags) { + this.docsAndPositionsFlags = docsAndPositionsFlags; + this.docFlags = docFlags; + } + + /** + * Returns the flags for {@link DocsAndPositionsEnum}. This value should be + * passed to + * {@link TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum, int)} + * + * @return {@link DocsAndPositionsEnum} flags + */ + public int docsAndPositionsFlags() { + return docsAndPositionsFlags; + } + + /** + * Returns the flags for {@link DocsEnum}. This value should be + * passed to + * {@link TermsEnum#docs(Bits, DocsEnum, int)} + * + * @return {@link DocsEnum} flags + */ + public int docFlags() { + return docFlags; + } + } + public boolean scoresDocsOutOfOrder() { return false; } diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalFilter.java new file mode 100644 index 0000000..7383c20 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalFilter.java @@ -0,0 +1,41 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class BlockIntervalFilter implements IntervalFilter { + + private final boolean collectLeaves; + + public BlockIntervalFilter() { + this(true); + } + + public BlockIntervalFilter(boolean collectLeaves) { + this.collectLeaves = collectLeaves; + } + + @Override + public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) { + return new BlockIntervalIterator(collectIntervals, collectLeaves, iter); + } + + @Override + public String toString() { + return "BLOCK"; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalIterator.java new file mode 100644 index 0000000..d19fbe7 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/BlockIntervalIterator.java @@ -0,0 +1,180 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.search.Scorer; + +import java.io.IOException; +import java.util.Arrays; + +/** + * An IntervalIterator implementing minimum interval semantics for the + * BLOCK operator + * + * See "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + * + * @lucene.experimental + */ +public final class BlockIntervalIterator extends IntervalIterator { + private final IntervalIterator[] iterators; + + private static final Interval INFINITE_INTERVAL = new Interval(); + private final Interval[] intervals; + private final Interval interval = new Interval(); + private final int[] gaps; + + private final int lastIter; + private boolean collectLeaves = true; + + public BlockIntervalIterator(boolean collectIntervals, boolean collectLeaves, IntervalIterator other) { + this(collectIntervals, other); + this.collectLeaves = collectLeaves; + } + + /** + * Construct a BlockIntervalIterator over a compound IntervalIterator. The + * sub-iterators must be in order and sequential for a match. + * @param collectIntervals true if intervals will be collected + * @param other the compound {@link IntervalIterator} used to extract the individual block iterators + */ + public BlockIntervalIterator(boolean collectIntervals, IntervalIterator other) { + this(collectIntervals, defaultIncrements(other.subs(true).length), other); + } + + /** + * Construct a BlockIntervalIterator over a compound IntervalIterator using + * a supplied increments array. + * @param collectIntervals true if intervals will be collected + * @param increments an array of position increments between the iterators + * @param other the compound {@link IntervalIterator} used to extract the individual block iterators + */ + public BlockIntervalIterator(boolean collectIntervals, int[] increments, IntervalIterator other) { + super(other.getScorer(), collectIntervals); + assert other.subs(true) != null; + iterators = other.subs(true); + assert iterators.length > 1; + intervals = new Interval[iterators.length]; + lastIter = iterators.length - 1; + this.gaps = increments; + } + + /** + * Construct a BlockIntervalIterator over a set of subiterators using a supplied + * increments array + * @param scorer the parent Scorer + * @param increments an array of position increments between the iterators + * @param collectIntervals true if intervals will be collected + * @param iterators the subiterators + */ + public BlockIntervalIterator(Scorer scorer, int[] increments, boolean collectIntervals, + IntervalIterator... iterators) { + super(scorer, collectIntervals); + assert iterators.length > 1; + this.iterators = iterators; + intervals = new Interval[iterators.length]; + lastIter = iterators.length - 1; + this.gaps = increments; + } + + /** + * Construct a BlockIntervalIterator over a set of subiterators + * @param scorer the parent Scorer + * @param collectIntervals true if intervals will be collected + * @param iterators the subiterators + */ + public BlockIntervalIterator(Scorer scorer, boolean collectIntervals, IntervalIterator... iterators) { + this(scorer, defaultIncrements(iterators.length), collectIntervals, iterators); + } + + private static int[] defaultIncrements(int num) { + int[] gaps = new int[num]; + Arrays.fill(gaps, 1); + return gaps; + } + + @Override + public Interval next() throws IOException { + if ((intervals[0] = iterators[0].next()) == null) { + return null; + } + int offset = 0; + for (int i = 1; i < iterators.length;) { + final int gap = gaps[i]; + while (intervals[i].begin + gap <= intervals[i - 1].end) { + if ((intervals[i] = iterators[i].next()) == null) { + return null; + } + } + offset += gap; + if (intervals[i].begin == intervals[i - 1].end + gaps[i]) { + i++; + if (i < iterators.length && intervals[i] == INFINITE_INTERVAL) { + // advance only if really necessary + iterators[i].scorerAdvanced(docID()); + assert iterators[i].docID() == docID(); + } + } else { + do { + if ((intervals[0] = iterators[0].next()) == null) { + return null; + } + } while (intervals[0].begin < intervals[i].end - offset); + + i = 1; + } + } + interval.begin = intervals[0].begin; + interval.end = intervals[lastIter].end; + interval.offsetBegin = intervals[0].offsetBegin; + interval.offsetEnd = intervals[lastIter].offsetEnd; + interval.field = intervals[0].field; + return interval; + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return iterators; + } + + @Override + public void collect(IntervalCollector collector) { + assert collectIntervals; + collector.collectComposite(scorer, interval, docID()); + if (collectLeaves) { + for (IntervalIterator iter : iterators) { + iter.collect(collector); + } + } + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + iterators[0].scorerAdvanced(docId); + assert iterators[0].docID() == docId; + iterators[1].scorerAdvanced(docId); + assert iterators[1].docID() == docId; + Arrays.fill(intervals, INFINITE_INTERVAL); + return docId; + } + + @Override + public int matchDistance() { + return intervals[lastIter].begin - intervals[0].end; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/BrouwerianIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/BrouwerianIntervalIterator.java new file mode 100644 index 0000000..5ee6abb --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/BrouwerianIntervalIterator.java @@ -0,0 +1,123 @@ +package org.apache.lucene.search.intervals; + +import org.apache.lucene.search.Scorer; + +import java.io.IOException; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * IntervalIterator based on minimum interval semantics for the Brouwerian + * operator. This {@link IntervalIterator} computes the difference M-S + * between the anti-chains M (minuend) and S (subtracted). + *

    + * + * + * See "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + */ +public class BrouwerianIntervalIterator extends IntervalIterator { + + private final IntervalIterator minuend; + private final IntervalIterator subtracted; + private Interval subtractedInterval; + private Interval currentInterval; + private final String field; + + /** + * Construct a new BrouwerianIntervalIterator over a minuend and a subtrahend + * IntervalIterator + * @param scorer the parent Scorer + * @param collectIntervals true if intervals will be collected + * @param minuend the minuend IntervalIterator + * @param subtracted the subtrahend IntervalIterator + */ + public BrouwerianIntervalIterator(Scorer scorer, boolean collectIntervals, + IntervalIterator minuend, IntervalIterator subtracted, String field) { + super(scorer, collectIntervals); + this.minuend = minuend; + this.subtracted = subtracted; + this.field = field; + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + //System.out.println("Advancing to doc " + docId); + //Thread.dumpStack(); + minuend.scorerAdvanced(docId); + if (subtracted.docID() <= docId) + subtracted.scorerAdvanced(docId); + subtractedInterval = new Interval(field); + return docId; + } + + @Override + public Interval next() throws IOException { + //System.out.println("next()"); + //System.out.println("Subtractend: " + subtracted.toString()); + //System.out.println("Minuend: " + minuend.toString()); + if (subtracted.docID() != minuend.docID() || subtractedInterval == null) { + //System.out.println("No subtrahend on doc " + minuend.docID()); + currentInterval = minuend.next(); + //System.out.println("----Returning " + currentInterval); + return currentInterval; + } + while ((currentInterval = minuend.next()) != null) { + //System.out.println("next() : advancing through minuend"); + //System.out.println("Subtract intervals: " + subtractedInterval.toString()); + //System.out.println("Current interval: " + currentInterval.toString()); + while(subtractedInterval.lessThanExclusive(currentInterval) && (subtractedInterval = subtracted.next()) != null) { + //System.out.println("next{} : advancing through subtrahend"); + //System.out.println("Subtractend: " + subtracted.toString()); + //System.out.println("Minuend: " + minuend.toString()); + } + if (subtractedInterval == null || !currentInterval.overlaps(subtractedInterval)) { + //System.out.println("----Returning " + currentInterval); + return currentInterval; + } + } + //System.out.println("----Returning " + currentInterval); + return currentInterval; + } + + @Override + public void collect(IntervalCollector collector) { + assert collectIntervals; + collector.collectComposite(scorer, currentInterval, docID()); + minuend.collect(collector); + + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return new IntervalIterator[] {minuend, subtracted}; + } + + + @Override + public int matchDistance() { + return minuend.matchDistance(); + } + + @Override + public int docID() { + return minuend.docID(); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/CombinedIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/CombinedIntervalIterator.java new file mode 100644 index 0000000..c98dbc3 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/CombinedIntervalIterator.java @@ -0,0 +1,132 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.PriorityQueue; + +import java.io.IOException; + +public class CombinedIntervalIterator extends IntervalIterator { + + private final IntervalPriorityQueue intervalQueue; + private final IntervalIterator[] children; + + private final Interval current = new Interval(); + + private SnapshotPositionCollector snapshot; + + public CombinedIntervalIterator(Scorer scorer, boolean collectIntervals, IntervalIterator... children) { + super(scorer, collectIntervals); + this.children = children; + intervalQueue = new IntervalPriorityQueue(children.length); + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + intervalQueue.clear(); + for (IntervalIterator child : children) { + IntervalIteratorRef ref = new IntervalIteratorRef(child, docId); + if (ref.interval != null) + intervalQueue.add(ref); + } + intervalQueue.updateTop(); + return docId; + } + + @Override + public Interval next() throws IOException { + if (intervalQueue.size() == 0) + return null; + + IntervalIteratorRef top = intervalQueue.top(); + current.copy(top.interval); + if (collectIntervals) + snapShotSubPositions(); + Interval interval; + if ((interval = top.iterator.next()) != null) { + top.interval = interval; + intervalQueue.updateTop(); + } + else + intervalQueue.pop(); + + return current; + } + + private void snapShotSubPositions() { + if (snapshot == null) { + snapshot = new SnapshotPositionCollector(intervalQueue.size()); + } + snapshot.reset(); + collectInternal(snapshot); + } + + private void collectInternal(IntervalCollector collector) { + assert collectIntervals; + intervalQueue.top().iterator.collect(collector); + } + + @Override + public void collect(IntervalCollector collector) { + assert collectIntervals; + if (snapshot == null) { + // we might not be initialized if the first interval matches + collectInternal(collector); + } else { + snapshot.replay(collector); + } + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return children; + } + + @Override + public int matchDistance() { + return 0; + } + + public static class IntervalIteratorRef { + + final IntervalIterator iterator; + Interval interval = null; + int doc = -1; + + public IntervalIteratorRef(IntervalIterator iterator, int advanceTo) throws IOException { + this.iterator = iterator; + this.doc = this.iterator.scorerAdvanced(advanceTo); + if (this.doc == advanceTo) { + this.interval = this.iterator.next(); + } + } + } + + public static class IntervalPriorityQueue extends PriorityQueue { + + public IntervalPriorityQueue(int maxSize) { + super(maxSize); + } + + @Override + protected boolean lessThan(IntervalIteratorRef a, IntervalIteratorRef b) { + return a.doc < b.doc || a.doc == b.doc && a.interval.strictlyLessThan(b.interval); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java new file mode 100644 index 0000000..572bf75 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/ConjunctionIntervalIterator.java @@ -0,0 +1,183 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.intervals.IntervalQueue.IntervalRef; + +import java.io.IOException; + +/** + * ConjuctionIntervalIterator based on minimal interval semantics for AND + * operator. + * + * See "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + * + * @lucene.experimental + */ +public final class ConjunctionIntervalIterator extends IntervalIterator { + + private final IntervalQueueAnd queue; + private final int nrMustMatch; + private SnapshotPositionCollector snapshot; + private final IntervalIterator[] iterators; + private int rightExtremeBegin; + private final boolean collectLeaves; + + /** + * Create a new ConjunctionIntervalIterator over a set of subiterators + * @param scorer the parent scorer + * @param collectIntervals true if intervals will be collected + * @param iterators a list of iterators to combine + * @throws IOException if a low level I/O exception occurs + */ + public ConjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, boolean collectLeaves, + IntervalIterator... iterators) { + this(scorer, collectIntervals, collectLeaves, iterators.length, iterators); + } + + public ConjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, IntervalIterator... iterators) { + this(scorer, collectIntervals, false, iterators); + } + + public ConjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, int mm, IntervalIterator... iterators) { + this(scorer, collectIntervals, false, mm, iterators); + } + + /** + * Create a new ConjunctionIntervalIterator over a set of subiterators, + * with a minimum number of matching subiterators per document + * @param scorer the parent Scorer + * @param collectIntervals true if intervals will be collected + * @param minimuNumShouldMatch the number of subiterators that should + * match a document for a match to be returned + * @param iterators a list of iterators to combine + * @throws IOException if an low level I/O exception occurs + */ + public ConjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, boolean collectLeaves, + int minimuNumShouldMatch, IntervalIterator... iterators) { + super(scorer, collectIntervals); + this.iterators = iterators; + this.queue = new IntervalQueueAnd(iterators.length); + this.nrMustMatch = minimuNumShouldMatch; + this.collectLeaves = collectLeaves; + } + + private void advance() throws IOException { + final IntervalRef top = queue.top(); + Interval interval = null; + if ((interval = iterators[top.index].next()) != null) { + top.interval = interval; + queue.updateRightExtreme(top); + queue.updateTop(); + } else { + queue.pop(); + } + } + + @Override + public Interval next() throws IOException { + + while (queue.size() >= nrMustMatch //&& queue.top().interval.field.equals(queue.currentCandidate.field) + && queue.top().interval.begin == queue.currentCandidate.begin) { + advance(); + } + if (queue.size() < nrMustMatch) { + return null; + } + do { + queue.updateCurrentCandidate(); + Interval top = queue.top().interval; + if (collectIntervals) { + snapShotSubPositions(); // this looks odd? -> see SnapShotCollector below for + // details! + } + if (queue.currentCandidate.begin == top.begin //&& queue.currentCandidate.field.equals(top.field) + && queue.currentCandidate.end == top.end) { + return queue.currentCandidate; + } + rightExtremeBegin = queue.rightExtremeBegin; + advance(); + } while (queue.size() >= nrMustMatch && queue.currentCandidate.end == queue.rightExtreme); + return queue.currentCandidate; // TODO support payloads + } + + + @Override + public int scorerAdvanced(final int docId) throws IOException { + if (docId == NO_MORE_DOCS) { + return NO_MORE_DOCS; + } + queue.reset(); + for (int i = 0; i < iterators.length; i++) { + int scorerAdvanced = iterators[i].scorerAdvanced(docId); + if (scorerAdvanced != docId) + return scorerAdvanced; + assert scorerAdvanced == docId; + final Interval interval = iterators[i].next(); + if (interval != null) { + IntervalRef intervalRef = new IntervalRef(interval, i); // TODO maybe + // reuse? + queue.updateRightExtreme(intervalRef); + queue.add(intervalRef); + } + } + return docId; + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return iterators; + } + + + private void snapShotSubPositions() { + if (snapshot == null) { + snapshot = new SnapshotPositionCollector(queue.size()); + } + snapshot.reset(); + collectInternal(snapshot); + } + + private void collectInternal(IntervalCollector collector) { + assert collectIntervals; + collector.collectComposite(scorer, queue.currentCandidate, docID()); + if (collectLeaves) { + for (IntervalIterator iter : iterators) { + iter.collect(collector); + } + } + } + + @Override + public void collect(IntervalCollector collector) { + assert collectIntervals; + if (snapshot == null) { + // we might not be initialized if the first interval matches + collectInternal(collector); + } else { + snapshot.replay(collector); + } + } + + @Override + public int matchDistance() { + return (rightExtremeBegin) - (queue.currentTopEnd) -1; // align the match if pos are adjacent + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalIterator.java new file mode 100644 index 0000000..be0c7d4 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalIterator.java @@ -0,0 +1,113 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.intervals.IntervalQueue.IntervalRef; + +import java.io.IOException; + +/** + * DisjunctionPositionIterator based on minimal interval semantics for OR + * operator + * + * "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + * + * @lucene.experimental + */ +public final class DisjunctionIntervalIterator extends IntervalIterator { + + private final IntervalQueue queue; + private final IntervalIterator[] iterators; + + /** + * Creates a new DisjunctionIntervalIterator over a set of IntervalIterators + * @param scorer the parent Scorer + * @param collectIntervals true if intervals will be collected + * @param intervals the IntervalIterators to iterate over + * @throws IOException if a low-level I/O error is encountered + */ + public DisjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, IntervalIterator... intervals) + throws IOException { + super(scorer, collectIntervals); + this.iterators = intervals; + queue = new IntervalQueueOr(intervals.length); + } + + private void advance() throws IOException { + final IntervalRef top = queue.top(); + Interval interval = null; + if ((interval = iterators[top.index].next()) != null) { + top.interval = interval; + queue.updateTop(); + } else { + queue.pop(); + } + } + + @Override + public Interval next() throws IOException { + while (queue.size() > 0 && + (queue.top().interval.field.equals(queue.currentCandidate.field)) && + (queue.top().interval.begin < queue.currentCandidate.begin || + (queue.top().interval.begin == queue.currentCandidate.begin && queue.top().interval.end <= queue.currentCandidate.end))) { + advance(); + } + if (queue.size() == 0) { + return null; + } + queue.updateCurrentCandidate(); + return queue.currentCandidate; // TODO support payloads + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return iterators; + } + + @Override + public void collect(IntervalCollector collector) { + assert collectIntervals; + collector.collectComposite(scorer, queue.currentCandidate, docID()); + iterators[queue.top().index].collect(collector); + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + queue.reset(); + for (int i = 0; i < iterators.length; i++) { + if (iterators[i].docID() <= docId) { + int scorerAdvanced = iterators[i].scorerAdvanced(docId); + //assert iterators[i].docID() == scorerAdvanced : " " + iterators[i]; + } + if (iterators[i].docID() == docId) { + Interval interval = iterators[i].next(); + if (interval != null) + queue.add(new IntervalRef(interval, i)); + } + } + return this.docID(); + } + + @Override + public int matchDistance() { + return iterators[queue.top().index].matchDistance(); + } + +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/Interval.java b/lucene/core/src/java/org/apache/lucene/search/intervals/Interval.java new file mode 100644 index 0000000..08a02bc --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/Interval.java @@ -0,0 +1,197 @@ +package org.apache.lucene.search.intervals; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Represents a section of a document that matches a query + */ +public class Interval implements Cloneable { + + /** The position of the start of this Interval */ + public int begin; + + /** The position of the end of this Interval */ + public int end; + + /** The offset of the start of this Interval */ + public int offsetBegin; + + /** The offset of the end of this Interval */ + public int offsetEnd; + + /** The field this interval is on */ + public String field; + + /** An interval that will always compare as less than any other interval */ + public static final Interval INFINITE_INTERVAL = new Interval(); + + /** + * Constructs a new Interval + * @param begin the start position + * @param end the end position + * @param offsetBegin the start offset + * @param offsetEnd the end offset + */ + public Interval(int begin, int end, int offsetBegin, int offsetEnd, String field) { + this.begin = begin; + this.end = end; + this.offsetBegin = offsetBegin; + this.offsetEnd = offsetEnd; + this.field = field; + } + + /** + * Constructs a new Interval with no initial values. This + * will always compare as less than any other Interval. + */ + public Interval() { + this(""); + } + + public Interval(String field) { + this(Integer.MIN_VALUE, Integer.MIN_VALUE, -1, -1, field); + } + + /** + * Update to span the range defined by two other Intervals. + * @param start the first Interval + * @param end the second Interval + */ + public void update(Interval start, Interval end) { + assert start.field == end.field; + this.begin = start.begin; + this.offsetBegin = start.offsetBegin; + this.end = end.end; + this.offsetEnd = end.offsetEnd; + } + + /** + * Compare with another Interval. + * @param other the comparator + * @return true if both start and end positions are less than + * the comparator. + */ + public boolean lessThanExclusive(Interval other) { + //assert field == other.field; + return begin < other.begin && end < other.end; + } + + /** + * Compare with another Interval. + * @param other the comparator + * @return true if both start and end positions are less than + * or equal to the comparator's. + */ + public boolean lessThan(Interval other) { + //assert field == other.field; + return begin <= other.begin && end <= other.end; + } + + /** + * Compare with another Interval + * @param other the comparator + * @return true if both start and end positions are greater then + * the comparator's. + */ + public boolean greaterThanExclusive(Interval other) { + assert field == other.field; + return begin > other.begin && end > other.end; + } + + /** + * Compare with another Interval + * @param other the comparator + * @return true if both start and end positions are greater then + * of equal to the comparator's. + */ + public boolean greaterThan(Interval other) { + assert field == other.field; + return begin >= other.begin && end >= other.end; + } + + /** + * Compare with another Interval + * @param other the comparator + * @return true if this Interval contains the comparator + */ + public boolean contains(Interval other) { + assert field == other.field; + return begin <= other.begin && other.end <= end; + } + + /** + * Compare with another Interval to find overlaps + * @param other + * @return true if the two intervals overlap + */ + public boolean overlaps(Interval other) { + //assert field == other.field; + return this.contains(other) || other.contains(this); + } + + public boolean strictlyLessThan(Interval other) { + return this.field.compareTo(other.field) < 0 + || this.field.equals(other.field) && this.begin < other.begin + || this.begin == other.begin && this.end <= other.end; + } + + /** + * Set all values of this Interval to be equal to another's + * @param other the Interval to copy + */ + public void copy(Interval other) { + begin = other.begin; + end = other.end; + offsetBegin = other.offsetBegin; + offsetEnd = other.offsetEnd; + field = other.field; + } + + /** + * Set to a state that will always compare as less than any + * other Interval. + */ + public void reset() { + offsetBegin = offsetEnd = -1; + begin = end = Integer.MIN_VALUE; + } + + /** + * Set to a state that will always compare as more than any + * other Interval. + */ + public void setMaximum() { + offsetBegin = offsetEnd = -1; + begin = end = Integer.MAX_VALUE; + } + + @Override + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(); // should not happen + } + } + + @Override + public String toString() { + return "Interval [field=" + field + " begin=" + begin + "(" + offsetBegin + "), end=" + + end + "(" + offsetEnd + ")]"; + } + +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalCollector.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalCollector.java new file mode 100644 index 0000000..9ddc3f8 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalCollector.java @@ -0,0 +1,43 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Scorer; + +/** + * Used for collecting matching {@link Interval}s from a search + */ +public interface IntervalCollector { + + /** + * Collects an individual term match + * @param scorer the parent scorer + * @param interval the interval to collect + * @param docID the docID of the document matched + */ + public void collectLeafPosition(Scorer scorer, Interval interval, int docID); + + /** + * Collects a composite interval that may have sub-intervals + * @param scorer the parent scorer + * @param interval the interval to collect + * @param docID the docID of the document matched + */ + public void collectComposite(Scorer scorer, Interval interval, int docID); + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilter.java new file mode 100644 index 0000000..9e8531a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilter.java @@ -0,0 +1,36 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Filters an {@link IntervalIterator} + * + * @see IntervalFilterQuery + */ +public interface IntervalFilter { + + /** + * Filter the passed in IntervalIterator + * @param collectIntervals true if the returned {@link IntervalIterator} will + * be passed to an {@link IntervalCollector} + * @param iter the {@link IntervalIterator} to filter + * @return a filtered {@link IntervalIterator} + */ + public abstract IntervalIterator filter(boolean collectIntervals, IntervalIterator iter); + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilterQuery.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilterQuery.java new file mode 100644 index 0000000..2725b0a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalFilterQuery.java @@ -0,0 +1,419 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ComplexExplanation; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.Weight.PostingFeatures; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.util.Set; +import java.util.TreeSet; + +/** + * A Query that filters the results of an inner {@link Query} using an + * {@link IntervalFilter}. + * + * @see OrderedNearQuery + * @see UnorderedNearQuery + * @see NonOverlappingQuery + * + * @lucene.experimental + */ +public class IntervalFilterQuery extends Query implements Cloneable { + + public static BooleanQuery createFieldConjunction(Query... subqueries) { + if (subqueries.length == 0) + throw new IllegalArgumentException("Cannot create empty conjunction"); + String field = subqueries[0].getField(); + if (field == null) + throw new IllegalArgumentException("Cannot create interval conjunction over null field"); + BooleanQuery bq = new BooleanQuery(); + for (Query query : subqueries) { + if (!field.equals(query.getField())) + throw new IllegalArgumentException("Cannot create interval conjunction over multiple fields: found " + + field + " and " + query.getField()); + bq.add(query, BooleanClause.Occur.MUST); + } + return bq; + } + + public static BooleanQuery createConjunction(Query... subqueries) { + BooleanQuery bq = new BooleanQuery(); + for (Query query : subqueries) { + bq.add(query, BooleanClause.Occur.MUST); + } + return bq; + } + + private Query inner; + private final IntervalFilter filter; + + /** + * Constructs a query using an inner query and an IntervalFilter + * @param inner the query to wrap + * @param filter the filter to restrict results by + */ + public IntervalFilterQuery(Query inner, IntervalFilter filter) { + this.inner = inner; + this.filter = filter; + } + + @Override + public void extractTerms(Set terms) { + inner.extractTerms(terms); + } + + @Override + public String getField() { + return inner.getField(); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + IntervalFilterQuery clone = null; + + Query rewritten = inner.rewrite(reader); + if (rewritten != inner) { + clone = (IntervalFilterQuery) this.clone(); + clone.inner = rewritten; + } + + if (clone != null) { + return clone; // some clauses rewrote + } else { + return this; // no clauses rewrote + } + } + + @Override + public Weight createWeight(IndexSearcher searcher) throws IOException { + return new IntervalFilterWeight(inner.createWeight(searcher), searcher); + } + + class IntervalFilterWeight extends Weight { + + private final Weight other; + private final Similarity similarity; + private final Similarity.SimWeight stats; + + public IntervalFilterWeight(Weight other, IndexSearcher searcher) throws IOException { + this.other = other; + this.similarity = searcher.getSimilarity(); + this.stats = getSimWeight(other.getQuery(), searcher); + } + + private Similarity.SimWeight getSimWeight(Query query, IndexSearcher searcher) throws IOException { + TreeSet terms = new TreeSet(); + query.extractTerms(terms); + if (terms.size() == 0) + return null; + int i = 0; + TermStatistics[] termStats = new TermStatistics[terms.size()]; + for (Term term : terms) { + TermContext state = TermContext.build(searcher.getTopReaderContext(), term); + termStats[i] = searcher.termStatistics(term, state); + i++; + } + final String field = terms.first().field(); // nocommit - should we be checking all filtered terms + // are on the same field? + return similarity.computeWeight(query.getBoost(), searcher.collectionStatistics(field), termStats); + + } + + @Override + public Explanation explain(AtomicReaderContext context, int doc) + throws IOException { + Scorer scorer = scorer(context, PostingFeatures.POSITIONS, + context.reader().getLiveDocs()); + if (scorer != null) { + int newDoc = scorer.advance(doc); + if (newDoc == doc) { + float freq = scorer.freq(); + Similarity.SimScorer docScorer = similarity.simScorer(stats, context); + ComplexExplanation result = new ComplexExplanation(); + result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); + Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); + result.addDetail(scoreExplanation); + result.setValue(scoreExplanation.getValue()); + result.setMatch(true); + return result; + } + } + return new ComplexExplanation(false, 0.0f, + "No matching term within position filter"); + } + + @Override + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { + if (stats == null) + return null; + flags = flags == PostingFeatures.DOCS_AND_FREQS ? PostingFeatures.POSITIONS : flags; + ScorerFactory factory = new ScorerFactory(other, context, flags, acceptDocs); + final Scorer scorer = factory.scorer(); + Similarity.SimScorer docScorer = similarity.simScorer(stats, context); + return scorer == null ? null : new IntervalFilterScorer(this, scorer, factory, docScorer); + } + + @Override + public Query getQuery() { + return IntervalFilterQuery.this; + } + + @Override + public float getValueForNormalization() throws IOException { + return stats == null ? 1.0f : stats.getValueForNormalization(); + } + + @Override + public void normalize(float norm, float topLevelBoost) { + if (stats != null) + stats.normalize(norm, topLevelBoost); + } + } + + static class ScorerFactory { + final Weight weight; + final AtomicReaderContext context; + final PostingFeatures flags; + final Bits acceptDocs; + ScorerFactory(Weight weight, + AtomicReaderContext context, PostingFeatures flags, + Bits acceptDocs) { + this.weight = weight; + this.context = context; + this.flags = flags; + this.acceptDocs = acceptDocs; + } + + public Scorer scorer() throws IOException { + return weight.scorer(context, flags, acceptDocs); + } + + } + + final class IntervalFilterScorer extends Scorer { + + private final Scorer other; + private IntervalIterator filter; + private Interval current; + private final ScorerFactory factory; + private final Similarity.SimScorer docScorer; + + public IntervalFilterScorer(Weight weight, Scorer other, ScorerFactory factory, + Similarity.SimScorer docScorer) throws IOException { + super(weight); + this.other = other; + this.factory = factory; + this.filter = IntervalFilterQuery.this.filter.filter(false, other.intervals(false)); + this.docScorer = docScorer; + } + + @Override + public float score() throws IOException { + return docScorer.score(docID(), freq()); + } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + if (collectIntervals) { + final Scorer collectingScorer = factory.scorer(); + final IntervalIterator filter = IntervalFilterQuery.this.filter.filter(true, collectingScorer.intervals(true)); + return new IntervalIterator(this, true) { + + @Override + public int scorerAdvanced(int docId) throws IOException { + //System.out.println("IntervalIterator: advancing from " + collectingScorer.docID() + " to " + docId); + if (collectingScorer.docID() >= docId) { + return collectingScorer.docID(); + } + int target = collectingScorer.advance(docId); + if (target == NO_MORE_DOCS) + return NO_MORE_DOCS; + return filter.scorerAdvanced(target); + } + + @Override + public Interval next() throws IOException { + return filter.next(); + } + + @Override + public void collect(IntervalCollector collector) { + filter.collect(collector); + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return filter.subs(inOrder); + } + + @Override + public int matchDistance() { + return filter.matchDistance(); + } + + @Override + public int docID() { + return filter.docID(); + } + + @Override + public String toString() { + return IntervalFilterQuery.this.toString(null) + "[" + filter + "]"; + } + + }; + } + + return new IntervalIterator(this, collectIntervals) { + private boolean buffered = true; + @Override + public int scorerAdvanced(int docId) throws IOException { + buffered = true; + assert docId == filter.docID(); + return docId; + } + + @Override + public Interval next() throws IOException { + if (buffered) { + buffered = false; + return current; + } + else if (current != null) { + return current = filter.next(); + } + return null; + } + + @Override + public void collect(IntervalCollector collector) { + filter.collect(collector); + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return filter.subs(inOrder); + } + + @Override + public int matchDistance() { + return filter.matchDistance(); + } + + }; + } + + @Override + public int docID() { + return other.docID(); + } + + @Override + public int nextDoc() throws IOException { + int docId = -1; + while ((docId = other.nextDoc()) != Scorer.NO_MORE_DOCS) { + filter.scorerAdvanced(docId); + if ((current = filter.next()) != null) { // just check if there is at least one interval that matches! + return other.docID(); + } + } + return Scorer.NO_MORE_DOCS; + } + + @Override + public int advance(int target) throws IOException { + int docId = other.advance(target); + if (docId == Scorer.NO_MORE_DOCS) { + return NO_MORE_DOCS; + } + do { + filter.scorerAdvanced(docId); + if ((current = filter.next()) != null) { + return other.docID(); + } + } while ((docId = other.nextDoc()) != Scorer.NO_MORE_DOCS); + return NO_MORE_DOCS; + } + + @Override + public long cost() { + return other.cost(); + } + + @Override + public int freq() throws IOException { + return 1; // nocommit how to calculate frequency? + } + + public float sloppyFreq() throws IOException { + float freq = 0.0f; + do { + int d = filter.matchDistance(); + freq += docScorer.computeSlopFactor(d); + } + while (filter.next() != null); + return freq; + } + + } + + @Override + public String toString(String field) { + return "Filtered/" + filter.toString() + "(" + inner.toString() + ")"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((filter == null) ? 0 : filter.hashCode()); + result = prime * result + ((inner == null) ? 0 : inner.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + IntervalFilterQuery other = (IntervalFilterQuery) obj; + if (filter == null) { + if (other.filter != null) return false; + } else if (!filter.equals(other.filter)) return false; + if (inner == null) { + if (other.inner != null) return false; + } else if (!inner.equals(other.inner)) return false; + return true; + } + +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalIterator.java new file mode 100644 index 0000000..5e53e9e --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalIterator.java @@ -0,0 +1,148 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.search.Scorer; + +import java.io.IOException; + +/** + * Iterator over the matching {@link Interval}s of a {@link Scorer} + * + * @lucene.experimental + */ +public abstract class IntervalIterator { + + /** An empty array of IntervalIterators */ + public static final IntervalIterator[] EMPTY = new IntervalIterator[0]; + + /** An IntervalIterator containing no further Intervals */ + public static final IntervalIterator NO_MORE_INTERVALS = new EmptyIntervalIterator(); + + /** Integer representing no more documents */ + public static final int NO_MORE_DOCS = Integer.MAX_VALUE; + + protected final Scorer scorer; + protected final boolean collectIntervals; + + /** + * Constructs an IntervalIterator over a {@link Scorer} + * @param scorer the {@link Scorer} to pull positions from + * @param collectIntervals true if positions will be collected + */ + public IntervalIterator(Scorer scorer, boolean collectIntervals) { + this.scorer = scorer; + this.collectIntervals = collectIntervals; + } + + /** + * Called after the parent scorer has been advanced. If the scorer is + * currently positioned on docId, then subsequent calls to next() will + * return Intervals for that document; otherwise, no Intervals are + * available + * @param docId the document the parent scorer was advanced to + * @return the docId that the scorer is currently positioned at + * @throws IOException if a low-level I/O error is encountered + */ + public abstract int scorerAdvanced(int docId) throws IOException; + + /** + * Get the next Interval on the current document. + * @return the next Interval, or null if there are no remaining Intervals + * @throws IOException if a low-level I/O error is encountered + */ + public abstract Interval next() throws IOException; + + /** + * If intervals are to be collected, this will be called once + * for each Interval returned by the iterator. The constructor + * must have been called with collectIntervals=true. + * @param collector an {@link IntervalCollector} to collect the + * Interval positions + * @see Scorer#intervals(boolean) + */ + public abstract void collect(IntervalCollector collector); + + /** + * Get any sub-iterators + * + * @param inOrder + * true if the sub-iterators should be returned in the same order the + * queries were provided + */ + public abstract IntervalIterator[] subs(boolean inOrder); + + /** + * Get the distance between matching subintervals + */ + public abstract int matchDistance(); + + /** + * Get the current docID + */ + public int docID() { + return scorer.docID(); + } + + /** + * Get this iterator's {@link Scorer} + */ + public Scorer getScorer() { + return scorer; + } + + /** + * An iterator that is always exhausted + */ + private static final class EmptyIntervalIterator extends + IntervalIterator { + + public EmptyIntervalIterator() { + super(null, false); + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + return IntervalIterator.NO_MORE_DOCS; + } + + @Override + public Interval next() throws IOException { + return null; + } + + @Override + public void collect(IntervalCollector collectoc) {} + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return EMPTY; + } + + @Override + public int matchDistance() { + return Integer.MAX_VALUE; + } + + @Override + public int docID() { + return IntervalIterator.NO_MORE_DOCS; + } + + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueue.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueue.java new file mode 100644 index 0000000..76bc8cb --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueue.java @@ -0,0 +1,71 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.search.intervals.IntervalQueue.IntervalRef; +import org.apache.lucene.util.PriorityQueue; + +/** + * Abstract base class for calculating minimal spanning intervals with Queues. + * @see IntervalQueueAnd + * + * @lucene.experimental + * @lucene.internal + */ +abstract class IntervalQueue extends PriorityQueue { + /** + * The current interval spanning the queue + */ + final Interval currentCandidate; + + /** + * Creates a new {@link IntervalQueue} with a fixed size + * @param size the size of the queue + */ + public IntervalQueue(int size) { + super(size); + currentCandidate = new Interval(); + } + + /** + * Clears and resets the queue to its initial values; + */ + void reset() { + clear(); + currentCandidate.reset(); + } + + /** + * Called by the consumer each time the head of the queue was updated + */ + abstract void updateCurrentCandidate(); + + /** + * Holds a reference to an interval and its index. + */ + final static class IntervalRef { + Interval interval; + final int index; + + IntervalRef(Interval interval, int index) { + super(); + this.interval = interval; + this.index = index; + } + } + +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueAnd.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueAnd.java new file mode 100644 index 0000000..3d35d70 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueAnd.java @@ -0,0 +1,90 @@ +package org.apache.lucene.search.intervals; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Queue class for calculating minimal spanning conjunction intervals + * @lucene.experimental + */ +final class IntervalQueueAnd extends IntervalQueue { + + /** the current right extreme positions of the queue */ + int rightExtreme = Integer.MIN_VALUE; + /** the current right extreme offset of the queue */ + int rightExtremeOffset = Integer.MIN_VALUE; + /** the current right extreme begin position*/ + int rightExtremeBegin; + /** the end of the internval on top of the queue*/ + int currentTopEnd; + + /** + * Creates a new {@link IntervalQueueAnd} with a fixed size + * @param size the size of the queue + */ + IntervalQueueAnd(int size) { + super(size); + } + + @Override + void reset () { + super.reset(); + rightExtreme = Integer.MIN_VALUE; + rightExtremeOffset = Integer.MIN_VALUE; + } + + /** + * Updates the right extreme of this queue if the end of the given interval is + * greater or equal than the current right extreme of the queue. + * + * @param intervalRef the interval to compare + */ + void updateRightExtreme(IntervalRef intervalRef) { + final Interval interval = intervalRef.interval; + if (rightExtreme <= interval.end) { + rightExtreme = interval.end; + rightExtremeOffset = interval.offsetEnd; + rightExtremeBegin = interval.begin; + } + } + + @Override + void updateCurrentCandidate() { + final IntervalRef top = top(); + Interval interval = top.interval; + currentCandidate.begin = interval.begin; + currentCandidate.offsetBegin = interval.offsetBegin; + currentCandidate.end = rightExtreme; + currentCandidate.offsetEnd = rightExtremeOffset; + currentCandidate.field = interval.field; + currentTopEnd = interval.end; + + } + + @Override + protected boolean lessThan(IntervalRef left, IntervalRef right) { + final Interval a = left.interval; + final Interval b = right.interval; + if (a.field == null) + return true; + if (b.field == null) + return false; + if (a.field.equals(b.field)) + return a.begin < b.begin || (a.begin == b.begin && a.end > b.end) || a.offsetBegin < b.offsetBegin; + return (a.field.compareTo(b.field)) < 0; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueOr.java b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueOr.java new file mode 100644 index 0000000..a99d91a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/IntervalQueueOr.java @@ -0,0 +1,51 @@ +package org.apache.lucene.search.intervals; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Queue class for calculating minimal spanning disjunct intervals + * @lucene.experimental + */ +final class IntervalQueueOr extends IntervalQueue { + + /** + * Creates a new {@link IntervalQueueOr} with a fixed size + * @param size the size of the queue + */ + IntervalQueueOr(int size) { + super(size); + } + + @Override + void updateCurrentCandidate() { + currentCandidate.copy(top().interval); + } + + @Override + protected boolean lessThan(IntervalRef left, IntervalRef right) { + final Interval a = left.interval; + final Interval b = right.interval; + if (a.field == null) + return true; + if (b.field == null) + return false; + if (a.field.equals(b.field)) + return a.begin < b.begin || (a.begin == b.begin && a.end < b.end); + return (a.field.compareTo(b.field)) < 0; + //return a.end < b.end || (a.end == b.end && a.begin >= b.begin); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/MinFrequencyFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/MinFrequencyFilter.java new file mode 100644 index 0000000..98f6fc4 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/MinFrequencyFilter.java @@ -0,0 +1,106 @@ +package org.apache.lucene.search.intervals; + +import java.io.IOException; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class MinFrequencyFilter implements IntervalFilter { + + private final int minFreq; + + public MinFrequencyFilter(int minFreq) { + this.minFreq = minFreq; + } + + @Override + public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) { + return new MinFrequencyIntervalIterator(minFreq, iter, collectIntervals); + } + + public static class MinFrequencyIntervalIterator extends IntervalIterator { + + private final IntervalIterator subIter; + private final Interval[] intervalCache; + private final int[] distanceCache; + + private int cachePos = -1; + private int freq = -1; + + public MinFrequencyIntervalIterator(int minFreq, IntervalIterator iter, boolean collectIntervals) { + super(iter == null ? null : iter.scorer, collectIntervals); + this.subIter = iter; + this.intervalCache = new Interval[minFreq]; + for (int i = 0; i < minFreq; i++) { + this.intervalCache[i] = new Interval(); + } + this.distanceCache = new int[minFreq]; + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + cachePos = -1; + return subIter.scorerAdvanced(docId); + } + + @Override + public Interval next() throws IOException { + if (cachePos == -1) + freq = loadIntervalCache(); + if (freq < intervalCache.length) + return null; + cachePos++; + if (cachePos < intervalCache.length) + return intervalCache[cachePos]; + return subIter.next(); + } + + private int loadIntervalCache() throws IOException { + int f = 0; + Interval interval; + while (f < intervalCache.length && (interval = subIter.next()) != null) { + intervalCache[f].copy(interval); + f++; + } + return f; + } + + @Override + public void collect(IntervalCollector collector) { + if (cachePos < distanceCache.length) + collector.collectComposite(null, intervalCache[cachePos], subIter.docID()); + subIter.collect(collector); + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return new IntervalIterator[]{ subIter }; + } + + @Override + public int matchDistance() { + if (cachePos < distanceCache.length) + return distanceCache[cachePos]; + return subIter.matchDistance(); + } + } + + @Override + public String toString() { + return "MINFREQ(" + minFreq + ")"; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/NonOverlappingQuery.java b/lucene/core/src/java/org/apache/lucene/search/intervals/NonOverlappingQuery.java new file mode 100644 index 0000000..5144afc --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/NonOverlappingQuery.java @@ -0,0 +1,364 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.Weight.PostingFeatures; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.util.Set; + +/** + * A Query that matches documents containing an interval (the minuend) that + * does not contain another interval (the subtrahend). + * + * As an example, given the following {@link org.apache.lucene.search.BooleanQuery}: + *

    + *   BooleanQuery bq = new BooleanQuery();
    + *   bq.add(new TermQuery(new Term(field, "quick")), BooleanQuery.Occur.MUST);
    + *   bq.add(new TermQuery(new Term(field, "fox")), BooleanQuery.Occur.MUST);
    + * 
    + * + * The document "the quick brown fox" will be matched by this query. But + * create a NonOverlappingQuery using this query as a minuend: + *
    + *   NonOverlappingQuery brq = new NonOverlappingQuery(bq, new TermQuery(new Term(field, "brown")));
    + * 
    + * + * This query will not match "the quick brown fox", because "brown" is found + * within the interval of the boolean query for "quick" and "fox. The query + * will match "the quick fox is brown", because here "brown" is outside + * the minuend's interval. + * + * N.B. Positions must be included in the index for this query to work + * + * Implements the Brouwerian operator as defined in "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + * + * @lucene.experimental + * @see BrouwerianIntervalIterator + */ +public final class NonOverlappingQuery extends Query implements Cloneable { + + private final Query minuend; + private final Query subtrahend; + private final String field; + + /** + * Constructs a Query that matches documents containing intervals of the minuend + * that are not subtended by the subtrahend + * @param minuend the minuend Query + * @param subtrahend the subtrahend Query + */ + public NonOverlappingQuery(Query minuend, Query subtrahend) { + this.minuend = minuend; + this.subtrahend = subtrahend; + this.field = minuend.getField(); + if (minuend.getField() == null) + throw new IllegalArgumentException("Minuend query must have a field declared"); + if (subtrahend.getField() == null) + throw new IllegalArgumentException("Subtrahend query must have a field declared"); + if (!minuend.getField().equals(subtrahend.getField())) + throw new IllegalArgumentException("Minuend and subtrahend must be on the same field"); + } + + @Override + public void extractTerms(Set terms) { + minuend.extractTerms(terms); + subtrahend.extractTerms(terms); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query rewritten = minuend.rewrite(reader); + Query subRewritten = subtrahend.rewrite(reader); + if (rewritten != minuend || subRewritten != subtrahend) { + return new NonOverlappingQuery(rewritten, subRewritten); + } + return this; + } + + @Override + public Weight createWeight(IndexSearcher searcher) throws IOException { + return new BrouwerianQueryWeight(minuend.createWeight(searcher), subtrahend.createWeight(searcher)); + } + + class BrouwerianQueryWeight extends Weight { + + private final Weight minuted; + private final Weight subtracted; + + public BrouwerianQueryWeight(Weight minuted, Weight subtracted) { + this.minuted = minuted; + this.subtracted = subtracted; + } + + @Override + public Explanation explain(AtomicReaderContext context, int doc) + throws IOException { + return minuted.explain(context, doc); + } + + @Override + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { + flags = flags == PostingFeatures.DOCS_AND_FREQS ? PostingFeatures.POSITIONS : flags; + ScorerFactory factory = new ScorerFactory(minuted, subtracted, context, flags, acceptDocs); + final Scorer scorer = factory.minutedScorer(); + final Scorer subScorer = factory.subtractedScorer(); + if (subScorer == null) { + return scorer; + } + return scorer == null ? null : new BrouwerianScorer(this, scorer, subScorer, factory); + } + + @Override + public Query getQuery() { + return NonOverlappingQuery.this; + } + + @Override + public float getValueForNormalization() throws IOException { + return minuted.getValueForNormalization(); + } + + @Override + public void normalize(float norm, float topLevelBoost) { + minuted.normalize(norm, topLevelBoost); + } + } + + static class ScorerFactory { + final Weight minuted; + final Weight subtracted; + final AtomicReaderContext context; + final PostingFeatures flags; + final Bits acceptDocs; + ScorerFactory(Weight minuted, Weight subtracted, + AtomicReaderContext context, PostingFeatures flags, + Bits acceptDocs) { + this.minuted = minuted; + this.subtracted = subtracted; + this.context = context; + this.flags = flags; + this.acceptDocs = acceptDocs; + } + + public Scorer minutedScorer() throws IOException { + return minuted.scorer(context, flags, acceptDocs); + } + + public Scorer subtractedScorer() throws IOException { + return subtracted.scorer(context, flags, acceptDocs); + } + + } + + final class BrouwerianScorer extends Scorer { + + private final Scorer minuend; + private IntervalIterator filter; + private final Scorer subtracted; + Interval current; + private final ScorerFactory factory; + + public BrouwerianScorer(Weight weight, Scorer minuend, Scorer subtracted, ScorerFactory factory) throws IOException { + super(weight); + this.minuend = minuend; + this.subtracted = subtracted; + this.filter = new BrouwerianIntervalIterator(minuend, false, minuend.intervals(false), + subtracted.intervals(false), field); + this.factory = factory; + } + + @Override + public float score() throws IOException { + return minuend.score(); + } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + if (collectIntervals) { + final Scorer minuted = factory.minutedScorer(); + final Scorer subtracted = factory.subtractedScorer(); + final BrouwerianIntervalIterator brouwerianIntervalIterator + = new BrouwerianIntervalIterator(subtracted, true, minuted.intervals(true), subtracted.intervals(true), field); + return new IntervalIterator(this, collectIntervals) { + + @Override + public int scorerAdvanced(int docId) throws IOException { + //if (docId == this.docID()) + // return docId; + int mId = minuted.advance(docId); + if (subtracted.docID() < docId) + subtracted.advance(docId); + if (mId <= docId) + return brouwerianIntervalIterator.scorerAdvanced(docId); + return mId; + } + + @Override + public Interval next() throws IOException { + return brouwerianIntervalIterator.next(); + } + + @Override + public void collect(IntervalCollector collector) { + brouwerianIntervalIterator.collect(collector); + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return brouwerianIntervalIterator.subs(inOrder); + } + + @Override + public int matchDistance() { + return brouwerianIntervalIterator.matchDistance(); + } + + }; + } + + + + return new IntervalIterator(this, false) { + private boolean buffered = true; + @Override + public int scorerAdvanced(int docId) throws IOException { + buffered = true; + assert docId == filter.docID(); + return docId; + } + + @Override + public Interval next() throws IOException { + if (buffered) { + buffered = false; + return current; + } + else if (current != null) { + return current = filter.next(); + } + return null; + } + + @Override + public void collect(IntervalCollector collector) { + filter.collect(collector); + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return filter.subs(inOrder); + } + + @Override + public int matchDistance() { + return filter.matchDistance(); + } + + }; + } + + @Override + public int docID() { + return minuend.docID(); + } + + @Override + public int nextDoc() throws IOException { + int docId = -1; + while ((docId = minuend.nextDoc()) != Scorer.NO_MORE_DOCS) { + if (subtracted.docID() < docId) + subtracted.advance(docId); + filter.scorerAdvanced(docId); + if ((current = filter.next()) != null) { // just check if there is a position that matches! + return minuend.docID(); + } + } + return Scorer.NO_MORE_DOCS; + } + + @Override + public int advance(int target) throws IOException { + int docId = minuend.advance(target); + subtracted.advance(docId); + if (docId == Scorer.NO_MORE_DOCS) { + return NO_MORE_DOCS; + } + do { + filter.scorerAdvanced(docId); + if ((current = filter.next()) != null) { + return minuend.docID(); + } + } while ((docId = minuend.nextDoc()) != Scorer.NO_MORE_DOCS); + return NO_MORE_DOCS; + } + + @Override + public long cost() { + return minuend.cost(); + } + + @Override + public int freq() throws IOException { + return minuend.freq(); + } + + } + + @Override + public String toString(String field) { + return "NonOverlappingQuery[" + minuend + ", " + subtrahend + "]"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((minuend == null) ? 0 : minuend.hashCode()); + result = prime * result + + ((subtrahend == null) ? 0 : subtrahend.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + NonOverlappingQuery other = (NonOverlappingQuery) obj; + if (minuend == null) { + if (other.minuend != null) return false; + } else if (!minuend.equals(other.minuend)) return false; + if (subtrahend == null) { + if (other.subtrahend != null) return false; + } else if (!subtrahend.equals(other.subtrahend)) return false; + return true; + } + +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedConjunctionIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedConjunctionIntervalIterator.java new file mode 100644 index 0000000..1941bbc --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedConjunctionIntervalIterator.java @@ -0,0 +1,169 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.search.Scorer; + +import java.io.IOException; + +/** + * An IntervalIterator based on minimum interval semantics for the + * AND< operator + * + * See "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + * + * @lucene.experimental + */ +public final class OrderedConjunctionIntervalIterator extends + IntervalIterator { + + private final IntervalIterator[] iterators; + private final Interval[] intervals; + private final int lastIter; + private final Interval interval; + + private int index = 1; + private int matchDistance = 0; + + private SnapshotPositionCollector snapshot = null; + private boolean collectLeaves = true; + + /** + * Create an OrderedConjunctionIntervalIterator over a composite IntervalIterator + * @param collectIntervals true if intervals will be collected + * @param other a composite IntervalIterator to wrap + */ + public OrderedConjunctionIntervalIterator(boolean collectIntervals, boolean collectLeaves, String field, IntervalIterator other) { + this(other.scorer, collectIntervals, field, other.subs(true)); + this.collectLeaves = collectLeaves; + } + + public OrderedConjunctionIntervalIterator(boolean collectIntervals, String field, IntervalIterator other) { + this(collectIntervals, true, field, other); + } + + /** + * Create an OrderedConjunctionIntervalIterator over a set of subiterators + * @param scorer the parent Scorer + * @param collectIntervals true if intervals will be collected + * @param iterators the subintervals to wrap + */ + public OrderedConjunctionIntervalIterator(Scorer scorer, boolean collectIntervals, String field, IntervalIterator... iterators) { + super(scorer, collectIntervals); + this.iterators = iterators; + assert iterators.length > 1; + intervals = new Interval[iterators.length]; + lastIter = iterators.length - 1; + this.interval = new Interval(field); + } + + @Override + public Interval next() throws IOException { + if(intervals[0] == null) { + return null; + } + interval.setMaximum(); + int b = Integer.MAX_VALUE; + while (true) { + while (true) { + final Interval previous = intervals[index - 1]; + if (previous.end >= b) { + return interval.begin == Integer.MAX_VALUE ? null : interval; + } + if (index == intervals.length || intervals[index].begin > previous.end) { + break; + } + Interval current = intervals[index]; + do { + final Interval next; + if (current.end >= b || (next = iterators[index].next()) == null) { + return interval.begin == Integer.MAX_VALUE ? null : interval; + } + current = intervals[index] = next; + } while (current.begin <= previous.end); + index++; + } + interval.update(intervals[0], intervals[lastIter]); + matchDistance = (intervals[lastIter].begin - lastIter) - intervals[0].end; + b = intervals[lastIter].begin; + index = 1; + if (collectIntervals) + snapshotSubPositions(); + intervals[0] = iterators[0].next(); + if (intervals[0] == null) { + return interval.begin == Integer.MAX_VALUE ? null : interval; + } + } + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return iterators; + } + + @Override + public void collect(IntervalCollector collector) { + assert collectIntervals; + if (snapshot == null) { + // we might not be initialized if the first interval matches + collectInternal(collector); + } else { + snapshot.replay(collector); + } + } + + private void snapshotSubPositions() { + if (snapshot == null) { + snapshot = new SnapshotPositionCollector(iterators.length); + } + snapshot.reset(); + collectInternal(snapshot); + } + + private void collectInternal(IntervalCollector collector) { + assert collectIntervals; + collector.collectComposite(scorer, interval, docID()); + if (collectLeaves) { + for (IntervalIterator iter : iterators) { + iter.collect(collector); + } + } + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + assert scorer.docID() == docId; + //System.out.println("OCI: scorerAdvanced start"); + for (int i = 0; i < iterators.length; i++) { + //System.out.println("OCI: advancing from " + iterators[i].docID() + " to " + docId); + int advanceTo = iterators[i].scorerAdvanced(docId); + assert advanceTo == docId; + intervals[i] = Interval.INFINITE_INTERVAL; + } + intervals[0] = iterators[0].next(); + index = 1; + return scorer.docID(); + } + + @Override + public int matchDistance() { + return matchDistance; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedNearQuery.java new file mode 100644 index 0000000..8863761 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/OrderedNearQuery.java @@ -0,0 +1,57 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Query; + +/** + * A query that matches if a set of subqueries also match, and are within + * a given distance of each other within the document. The subqueries + * must appear in the document in order. + * + * N.B. Positions must be included in the index for this query to work + * + * Implements the AND< operator as defined in "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + * + * @lucene.experimental + */ + +public class OrderedNearQuery extends IntervalFilterQuery { + + /** + * Constructs an OrderedNearQuery + * @param slop the maximum distance between the subquery matches + * @param collectLeaves false if only the master interval should be collected + * @param subqueries the subqueries to match. + */ + public OrderedNearQuery(int slop, boolean collectLeaves, Query... subqueries) { + super(createFieldConjunction(subqueries), new WithinOrderedFilter(subqueries[0].getField(), slop, collectLeaves)); + } + + /** + * Constructs an OrderedNearQuery + * @param slop the maximum distance between the subquery matches + * @param subqueries the subqueries to match. + */ + public OrderedNearQuery(int slop, Query... subqueries) { + this(slop, true, subqueries); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/RangeFrequencyFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/RangeFrequencyFilter.java new file mode 100644 index 0000000..b8eb0c5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/RangeFrequencyFilter.java @@ -0,0 +1,112 @@ +package org.apache.lucene.search.intervals; + +import java.io.IOException; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class RangeFrequencyFilter implements IntervalFilter { + + private final int maxFreq; + private final int minFreq; + + public RangeFrequencyFilter(int minFreq, int maxFreq) { + if (minFreq < 0) + throw new IllegalArgumentException("minFreq must be greater than 0"); + if (maxFreq < minFreq) + throw new IllegalArgumentException("maxFreq must be greater than minFreq"); + this.maxFreq = maxFreq; + this.minFreq = minFreq; + } + + @Override + public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) { + return new MaxFrequencyIntervalIterator(minFreq, maxFreq, iter, collectIntervals); + } + + public static class MaxFrequencyIntervalIterator extends IntervalIterator { + + private final IntervalIterator subIter; + private final Interval[] intervalCache; + private final int[] distanceCache; + private final int minFreq; + + private int cachePos = -1; + private int freq = -1; + + public MaxFrequencyIntervalIterator(int minFreq, int maxFreq, IntervalIterator iter, boolean collectIntervals) { + super(iter == null ? null : iter.scorer, collectIntervals); + this.minFreq = minFreq; + this.subIter = iter; + this.intervalCache = new Interval[maxFreq]; + for (int i = 0; i < maxFreq; i++) { + this.intervalCache[i] = new Interval(); + } + this.distanceCache = new int[maxFreq]; + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + cachePos = -1; + return subIter.scorerAdvanced(docId); + } + + @Override + public Interval next() throws IOException { + if (cachePos == -1) + freq = loadIntervalCache(); + if (freq == -1 || freq < minFreq) + return null; + cachePos++; + if (cachePos < freq) + return intervalCache[cachePos]; + return null; + } + + private int loadIntervalCache() throws IOException { + int f = 0; + Interval interval; + while ((interval = subIter.next()) != null) { + if (f >= intervalCache.length) + return -1; + intervalCache[f].copy(interval); + f++; + } + return f; + } + + @Override + public void collect(IntervalCollector collector) { + collector.collectComposite(null, intervalCache[cachePos], subIter.docID()); + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return new IntervalIterator[]{ subIter }; + } + + @Override + public int matchDistance() { + return distanceCache[cachePos]; + } + } + + @Override + public String toString() { + return "RANGEFREQ(" + minFreq + "," + maxFreq + ")"; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/RangeIntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/RangeIntervalFilter.java new file mode 100644 index 0000000..26fd362 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/RangeIntervalFilter.java @@ -0,0 +1,101 @@ +package org.apache.lucene.search.intervals; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * An IntervalFilter that restricts Intervals returned by an IntervalIterator + * to those which occur between a given start and end position. + * + * @lucene.experimental + */ +public class RangeIntervalFilter implements IntervalFilter { + + private int start; + private int end; + + /** + * Constructs a new RangeIntervalFilter + * @param start the start of the filtered range + * @param end the end of the filtered range + */ + public RangeIntervalFilter(int start, int end) { + this.start = start; + this.end = end; + } + + @Override + public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) { + return new RangeIntervalIterator(collectIntervals, iter); + } + + /** + * Wraps an IntervalIterator ignoring Intervals that fall outside a + * given range. + */ + private class RangeIntervalIterator extends IntervalIterator { + + private final IntervalIterator iterator; + private Interval interval; + + RangeIntervalIterator(boolean collectIntervals, IntervalIterator iter) { + super(iter == null ? null : iter.scorer, collectIntervals); + this.iterator = iter; + } + + @Override + public Interval next() throws IOException { + while ((interval = iterator.next()) != null) { + if(interval.end > end) { + return null; + } else if (interval.begin >= start) { + return interval; + } + } + return null; + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return new IntervalIterator[] { iterator }; + } + + @Override + public void collect(IntervalCollector collector) { + assert collectIntervals; + collector.collectComposite(null, interval, iterator.docID()); + iterator.collect(collector); + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + return iterator.scorerAdvanced(docId); + } + + @Override + public int matchDistance() { + return iterator.matchDistance(); + } + + } + + @Override + public String toString() { + return "RANGE(" + start + "," + end + ")"; + } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/SloppyIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/SloppyIntervalIterator.java new file mode 100644 index 0000000..3a275ee --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/SloppyIntervalIterator.java @@ -0,0 +1,235 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.search.Scorer; + +import java.io.IOException; + +/** + * An interval iterator that has the semantics of sloppy phrase query. + */ +public class SloppyIntervalIterator extends IntervalIterator { + private final int maxLen; + private int matchDistance; + private final IntervalIterator iterator; + + /** + * Create a SloppyIntervalIterator that matches subiterators within + * a specified maxLength + * @param scorer the parent Scorer + * @param maxLength the maximum distance between the first and last subiterator match + * @param collectIntervals true if intervals will be collected + * @param iterators the subiterators + * @throws IOException if an low level I/O exception occurs + */ + public SloppyIntervalIterator(Scorer scorer, int maxLength, + boolean collectIntervals, IntervalIterator... iterators) + throws IOException { + super(scorer, collectIntervals); + this.maxLen = maxLength; + this.iterator = new ConjunctionIntervalIterator(scorer, collectIntervals, iterators); + } + + @Override + public Interval next() throws IOException { + Interval current; + do { + if ((current = iterator.next()) != null) { + matchDistance = current.end - current.begin; + if (matchDistance <= maxLen) { +// System.out.println(matchDistance); + break; + } + } else { + break; + } + } while (true); + return current; + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + return iterator.scorerAdvanced(docId); + } + + public int matchDistance() { + return matchDistance; + } + + public static IntervalIterator create(Scorer scorer, boolean collectIntervals, + IntervalIterator iterator, int... offsets) { + if (offsets.length == 1) { + return new SingleSlopplyIntervalIterator(scorer, collectIntervals, iterator, offsets[0]); + } else { + return new SloppyGroupIntervalIterator(scorer, collectIntervals, iterator, offsets); + } + + } + + private final static class SingleSlopplyIntervalIterator extends + IntervalIterator { + private Interval realInterval; + private final Interval sloppyInterval = new Interval(); + private final IntervalIterator iterator; + private int offset; + + public SingleSlopplyIntervalIterator(Scorer scorer, + boolean collectIntervals, IntervalIterator iterator, int offset) { + super(scorer, collectIntervals); + this.iterator = iterator; + this.offset = offset; + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + return iterator.scorerAdvanced(docId); + } + + @Override + public Interval next() throws IOException { + if ((realInterval = iterator.next()) != null) { + sloppyInterval.begin = sloppyInterval.end = realInterval.begin - offset; + sloppyInterval.offsetBegin = realInterval.offsetBegin; + sloppyInterval.offsetEnd = realInterval.offsetEnd; + return sloppyInterval; + } + return null; + } + + @Override + public void collect(IntervalCollector collector) { + collector.collectLeafPosition(scorer, realInterval, docID()); + + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return null; + } + + @Override + public int matchDistance() { + return sloppyInterval.end - sloppyInterval.begin; + } + + } + + private final static class SloppyGroupIntervalIterator extends + IntervalIterator { + + private final Interval sloppyGroupInterval = new Interval(); + private final int[] offsets; + private final Interval[] intervalPositions; + private final IntervalIterator groupIterator; + private int currentIndex; + private boolean initialized; + + public SloppyGroupIntervalIterator(Scorer scorer, boolean collectIntervals, + IntervalIterator groupIterator, int... offsets) { + super(scorer, collectIntervals); + this.offsets = offsets; + this.groupIterator = groupIterator; + this.intervalPositions = new Interval[offsets.length]; + for (int i = 0; i < intervalPositions.length; i++) { + intervalPositions[i] = new Interval(); + } + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + initialized = false; + return groupIterator.scorerAdvanced(docId); + } + + @Override + public Interval next() throws IOException { + sloppyGroupInterval.begin = Integer.MAX_VALUE; + sloppyGroupInterval.end = Integer.MIN_VALUE; + if (!initialized) { + initialized = true; + + currentIndex = 0; + for (int i = 0; i < offsets.length; i++) { + Interval current; + if ((current = groupIterator.next()) != null) { + intervalPositions[i].copy(current); + + int p = current.begin - offsets[i]; + sloppyGroupInterval.begin = Math.min(sloppyGroupInterval.begin, p); + sloppyGroupInterval.end = Math.max(sloppyGroupInterval.end, p); + } else { + return null; + } + } + sloppyGroupInterval.offsetBegin = intervalPositions[0].offsetBegin; + sloppyGroupInterval.offsetEnd = intervalPositions[intervalPositions.length-1].offsetEnd; + return sloppyGroupInterval; + } + Interval current; + if ((current = groupIterator.next()) != null) { + final int currentFirst = currentIndex++ % intervalPositions.length; + intervalPositions[currentFirst].copy(current); + int currentIdx = currentIndex; + for (int i = 0; i < intervalPositions.length; i++) { // find min / max + int idx = currentIdx++ % intervalPositions.length; + int p = intervalPositions[idx].begin - offsets[i]; + sloppyGroupInterval.begin = Math.min(sloppyGroupInterval.begin, p); + sloppyGroupInterval.end = Math.max(sloppyGroupInterval.end, p); + } + sloppyGroupInterval.offsetBegin = intervalPositions[currentIndex % intervalPositions.length].offsetBegin; + sloppyGroupInterval.offsetEnd = intervalPositions[currentFirst].offsetEnd; + return sloppyGroupInterval; + } + return null; + } + + @Override + public void collect(IntervalCollector collector) { + int currentIdx = currentIndex+1; + for (int i = 0; i < intervalPositions.length; i++) { // find min / max + int idx = currentIdx++ % intervalPositions.length; + collector.collectLeafPosition(scorer, intervalPositions[idx], + docID()); + } + + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return new IntervalIterator[] {groupIterator}; + } + + @Override + public int matchDistance() { + return sloppyGroupInterval.end - sloppyGroupInterval.begin; + } + + } + + @Override + public void collect(IntervalCollector collector) { + assert collectIntervals; + this.iterator.collect(collector); + + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return null; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/SnapshotPositionCollector.java b/lucene/core/src/java/org/apache/lucene/search/intervals/SnapshotPositionCollector.java new file mode 100644 index 0000000..7a4c500 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/SnapshotPositionCollector.java @@ -0,0 +1,114 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * An IntervalCollector that allows a snapshot of the state of an + * IntervalIterator to be taken before it is advanced. + * + * Conjunction iterators advance their subiterators before the consumer + * can call collect on the top level iterator. If intervals are to be + * collected, we need to record the last possible match so that we can + * return the correct intervals for the match. + * + * @lucene.internal + */ +final class SnapshotPositionCollector implements IntervalCollector { + + private SingleSnapshot[] snapshots; + private int index = 0; + + /** + * Create a new collector with n snapshots + * @param subs the number of subiterators to record + */ + SnapshotPositionCollector(int subs) { + snapshots = new SingleSnapshot[subs]; + } + + @Override + public void collectLeafPosition(Scorer scorer, Interval interval, + int docID) { + collect(scorer, interval, docID, true); + + } + + private void collect(Scorer scorer, Interval interval, int docID, + boolean isLeaf) { + if (snapshots.length <= index) { + grow(ArrayUtil.oversize(index + 1, + (RamUsageEstimator.NUM_BYTES_OBJECT_REF * 2) + + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + + RamUsageEstimator.NUM_BYTES_BOOLEAN + + RamUsageEstimator.NUM_BYTES_INT)); + } + if (snapshots[index] == null) { + snapshots[index] = new SingleSnapshot(); + } + snapshots[index++].set(scorer, interval, isLeaf, docID); + } + + @Override + public void collectComposite(Scorer scorer, Interval interval, + int docID) { + collect(scorer, interval, docID, false); + } + + void replay(IntervalCollector collector) { + for (int i = 0; i < index; i++) { + SingleSnapshot singleSnapshot = snapshots[i]; + if (singleSnapshot.isLeaf) { + collector.collectLeafPosition(singleSnapshot.scorer, + singleSnapshot.interval, singleSnapshot.docID); + } else { + collector.collectComposite(singleSnapshot.scorer, + singleSnapshot.interval, singleSnapshot.docID); + } + } + } + + void reset() { + index = 0; + } + + private void grow(int size) { + final SingleSnapshot[] newArray = new SingleSnapshot[size]; + System.arraycopy(snapshots, 0, newArray, 0, index); + snapshots = newArray; + } + + private static final class SingleSnapshot { + Scorer scorer; + final Interval interval = new Interval(); + boolean isLeaf; + int docID; + + void set(Scorer scorer, Interval interval, boolean isLeaf, + int docID) { + this.scorer = scorer; + this.interval.copy(interval); + this.isLeaf = isLeaf; + this.docID = docID; + } + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/TermIntervalIterator.java b/lucene/core/src/java/org/apache/lucene/search/intervals/TermIntervalIterator.java new file mode 100644 index 0000000..fa5c611 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/TermIntervalIterator.java @@ -0,0 +1,127 @@ +package org.apache.lucene.search.intervals; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.search.Scorer; + +import java.io.IOException; + + +/** + * Iterates over the individual positions of a term in a document + */ +public final class TermIntervalIterator extends IntervalIterator { + + private final Interval interval; + int positionsPending; + private final DocsAndPositionsEnum docsAndPos; + private int docID = -1; + private final String field; + + /** + * Constructs a new TermIntervalIterator + * @param scorer the parent Scorer + * @param docsAndPos a DocsAndPositionsEnum positioned on the current document + * @param doPayloads true if payloads should be retrieved for the positions + * @param collectIntervals true if positions will be collected + */ + public TermIntervalIterator(Scorer scorer, DocsAndPositionsEnum docsAndPos, + boolean doPayloads, boolean collectIntervals, String field) { + super(scorer, collectIntervals); + this.docsAndPos = docsAndPos; + this.interval = new Interval(field); + this.field = field; + } + + @Override + public Interval next() throws IOException { + if (--positionsPending >= 0) { + interval.begin = interval.end = docsAndPos.nextPosition(); + interval.offsetBegin = docsAndPos.startOffset(); + interval.offsetEnd = docsAndPos.endOffset(); + return interval; + } + positionsPending = 0; + return null; + } + + @Override + public int docID() { + return docID; + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return EMPTY; + } + + @Override + public void collect(IntervalCollector collector) { + collector.collectLeafPosition(scorer, interval, docID); + } + + @Override + public int scorerAdvanced(int docId) throws IOException { +// interval.reset(); + if (docsAndPos.docID() == docId) { + positionsPending = docsAndPos.freq(); + } else { + positionsPending = -1; + } + return docID = docsAndPos.docID(); + } + + @Override + public String toString() { + return "TermPositions [interval=" + interval + ", positionsPending=" + + positionsPending + ", docID=" + docID + "]"; + } + + @Override + public int matchDistance() { + return 0; + } +// TODO not supported yet - need to figure out what that means really to support payloads +// private static final class PayloadInterval extends Interval { +// private int pos = -1; +// private final DocsAndPositionsEnum payloads; +// private final TermIntervalIterator termPos; +// +// public PayloadInterval(DocsAndPositionsEnum payloads, TermIntervalIterator pos) { +// this.payloads = payloads; +// this.termPos = pos; +// } +// +// @Override +// public BytesRef nextPayload() throws IOException { +// if (pos == termPos.positionsPending) { +// return null; +// } else { +// pos = termPos.positionsPending; +// return payloads.getPayload(); +// } +// } +// +// @Override +// public void reset() { +// super.reset(); +// pos = -1; +// } +// +// } +} \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/UnorderedNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/intervals/UnorderedNearQuery.java new file mode 100644 index 0000000..feda7b1 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/UnorderedNearQuery.java @@ -0,0 +1,82 @@ +package org.apache.lucene.search.intervals; + +/** + * Copyright (c) 2012 Lemur Consulting Ltd. + *

    + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Query; + +/** + * A query that matches if a set of subqueries also match, and are within + * a given distance of each other within the document. The subqueries + * may appear in the document in any order. + * + * N.B. Positions must be included in the index for this query to work + * + * Implements the LOWPASSk operator as defined in "Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" + * + * @lucene.experimental + */ + +public class UnorderedNearQuery extends IntervalFilterQuery { + + private final int slop; + + /** + * Constructs an OrderedNearQuery + * @param slop the maximum distance between the subquery matches + * @param collectLeaves false if only the parent interval should be collected + * @param subqueries the subqueries to match. + */ + public UnorderedNearQuery(int slop, boolean collectLeaves, Query... subqueries) { + super(createFieldConjunction(subqueries), new WithinUnorderedFilter(slop + subqueries.length - 2, collectLeaves)); + this.slop = slop; + } + + /** + * Constructs an OrderedNearQuery + * @param slop the maximum distance between the subquery matches + * @param subqueries the subqueries to match. + */ + public UnorderedNearQuery(int slop, Query... subqueries) { + this(slop, true, subqueries); + } + + @Override + public String toString() { + return "UnorderedNear/" + slop + ":" + super.toString(""); + } + + public static class WithinUnorderedFilter implements IntervalFilter { + + final IntervalFilter innerFilter; + final boolean collectLeaves; + + public WithinUnorderedFilter(int slop, boolean collectLeaves) { + this.innerFilter = new WithinIntervalFilter(slop); + this.collectLeaves = collectLeaves; + } + + @Override + public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) { + return innerFilter.filter(collectIntervals, + new ConjunctionIntervalIterator(iter.scorer, collectIntervals, collectLeaves, iter.subs(false))); + } + } + +} + diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/WithinIntervalFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/WithinIntervalFilter.java new file mode 100644 index 0000000..02dc955 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/WithinIntervalFilter.java @@ -0,0 +1,121 @@ +package org.apache.lucene.search.intervals; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + + +/** + * An IntervalFilter that restricts Intervals returned by an IntervalIterator + * to those which have a matchDistance less than a defined slop. + * + * @lucene.experimental + */ +public class WithinIntervalFilter implements IntervalFilter { + + private final int slop; + private boolean collectLeaves = true; + + /** + * Construct a new WithinIntervalFilter + * @param slop the maximum slop allowed for subintervals + */ + public WithinIntervalFilter(int slop) { + this.slop = slop; + } + + /** + * Construct a new WithinIntervalFilter + * @param slop the maximum slop allowed for subintervals + */ + public WithinIntervalFilter(int slop, boolean collectLeaves) { + this.slop = slop; + this.collectLeaves = collectLeaves; + } + + /** + * @return the slop + */ + public int getSlop() { + return slop; + } + + @Override + public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) { + return new WithinIntervalIterator(collectIntervals, iter); + } + + class WithinIntervalIterator extends IntervalIterator { + + private IntervalIterator iterator; + private Interval interval; + + WithinIntervalIterator(boolean collectIntervals, IntervalIterator iter) { + super(iter == null ? null : iter.scorer, collectIntervals); + this.iterator = iter; + } + + @Override + public Interval next() throws IOException { + while ((interval = iterator.next()) != null) { + if((iterator.matchDistance()) <= slop){ + return interval; + } + } + return null; + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return new IntervalIterator[] {iterator}; + } + + + @Override + public void collect(IntervalCollector collector) { + assert collectIntervals; + collector.collectComposite(null, interval, iterator.docID()); + if (collectLeaves) + iterator.collect(collector); + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + return iterator.scorerAdvanced(docId); + } + + @Override + public int matchDistance() { + return iterator.matchDistance(); + } + + @Override + public String toString() { + return "WithinIntervalIterator[" + iterator.docID() + ":" + interval + "]"; + } + + @Override + public int docID() { + return iterator.docID(); + } + + } + + @Override + public String toString() { + return "WITHIN(" + slop + ")"; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/WithinOrderedFilter.java b/lucene/core/src/java/org/apache/lucene/search/intervals/WithinOrderedFilter.java new file mode 100644 index 0000000..34a4458 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/WithinOrderedFilter.java @@ -0,0 +1,58 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An IntervalFilter that restricts an IntervalIterator to return + * only Intervals that occur in order within a given distance. + * + * @see WithinIntervalFilter + */ +public class WithinOrderedFilter implements IntervalFilter { + + private final WithinIntervalFilter innerFilter; + private final boolean collectLeaves; + private final String field; + + /** + * Constructs a new WithinOrderedFilter with a given slop + * @param slop The maximum distance allowed between subintervals + * @param collectLeaves false if only the parent interval should be collected + */ + public WithinOrderedFilter(String field, int slop, boolean collectLeaves) { + this.innerFilter = new WithinIntervalFilter(slop); + this.collectLeaves = collectLeaves; + this.field = field; + } + + public WithinOrderedFilter(String field, int slop) { + this(field, slop, true); + } + + @Override + public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) { + return innerFilter.filter(collectIntervals, + new OrderedConjunctionIntervalIterator(collectIntervals, collectLeaves, field, iter)); + } + + @Override + public String toString() { + return "ORDEREDWITHIN(" + this.innerFilter.getSlop() + ")"; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/search/intervals/package.html b/lucene/core/src/java/org/apache/lucene/search/intervals/package.html new file mode 100644 index 0000000..75eac5f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/intervals/package.html @@ -0,0 +1,70 @@ + + + + + org.apache.lucene.search.intervals + + +

    Interval Iterators

    +

    +Lucene offers extensive query and scoring flexibility including boolean queries, specialized phrase queries, wildcards and many more. The intervals package aims +to provide a common interface to Lucene's proximity features available on all core queries. The central class in this package is +{@link org.apache.lucene.search.intervals.IntervalIterator IntervalIterator}, which allows iterative consumption of term positions and offsets on complex queries. +{@link org.apache.lucene.search.Scorer Scorer} exposes direct access to the queries' {@link org.apache.lucene.search.intervals.IntervalIterator IntervalIterator} reflecting a logical view +of the scorer on positions and offsets for each matching document.

    +

    +Intervals are entirely detached from scoring/matching documents and have no effect on query performance if proximity information or offsets are not needed or consumed. Its lazy nature requires +the user to specify the need for positions/offsets at scorer creation time per segment allowing for a large number of usecases: + +

      +
    • Proximity matching without scoring ie. if token positions are needed for filtering out documents but the actual query score should not be modified
    • +
    • Second pass scoring ie. for high-performance proximity queries common practice is to re-score the top N (usually a large N) results of a non-proximity query with proximity information to improve precision.
    • +
    • Collecting an exhaustive list of intervals per query ie. complex queries might be interested in actual term positions across the entire query tree
    • +
    • Highlighting queries without re-analyzing the document or storing term vectors if offsets are stored in the index. Especially large documents will see a tremendous performance and space-consumption improvement over term-vectors / re-analyzing
    • +
    • Specializing queries for exotic proximity operators based on core queries
    • +
    + +

    Core Iterators and Queries

    + +The intervals package provides a basic set of {@link org.apache.lucene.search.intervals.IntervalIterator IntervalIterator} and {@link org.apache.lucene.search.Query Query} implementation +based on minimum interval semantics, as defined in +"Efficient Optimally Lazy Algorithms for Minimal-Interval Semantics" +

    + The following {@link org.apache.lucene.search.intervals.IntervalIterator IntervalIterator} implementations are provided: +

      +
    1. {@link org.apache.lucene.search.intervals.BlockIntervalIterator - BlockIntervalIterator} -- an iterator providing an ordered phrasal operator with given gaps between sub-iterators
    2. +
    3. {@link org.apache.lucene.search.intervals.OrderedConjunctionIntervalIterator - OrderedConjunctionIntervalIterator} -- an iterator providing an ordered non-overlapping conjunction operator
    4. +
    5. {@link org.apache.lucene.search.intervals.ConjunctionIntervalIterator - ConjunctionIntervalIterator} -- an iterator providing a unordered conjunction operator
    6. +
    7. {@link org.apache.lucene.search.intervals.BrouwerianIntervalIterator - BrouwerianIntervalIterator} -- an iterator computing the non-overlapping difference between two iterators
    8. +
    9. {@link org.apache.lucene.search.intervals.DisjunctionIntervalIterator - DisjunctionIntervalIterator} -- an iterator providing a unordered disjunction operator
    10. +
    + All queries require positions to be stored in the index. +

    + +

    + The following Query implementations are provided: +

      +
    1. {@link org.apache.lucene.search.intervals.IntervalFilterQuery - IntervalFilterQuery} -- Filters a Query based on the positions or ranges of its component parts
    2. +
    3. {@link org.apache.lucene.search.intervals.OrderedNearQuery - OrderedNearQuery} -- Filters queries based on the ordered difference between their match positions in a document
    4. +
    5. {@link org.apache.lucene.search.intervals.UnorderedNearQuery - UnorderedNearQuery} -- Filters queries based on the unordered difference between their match positions in a document
    6. +
    7. {@link org.apache.lucene.search.intervals.NonOverlappingQuery - NonOverlappingQuery} -- Filters out queries with overlapping match positions
    8. +
    + All queries require positions to be stored in the index. +

    + + diff --git a/lucene/core/src/java/org/apache/lucene/search/package.html b/lucene/core/src/java/org/apache/lucene/search/package.html index 1be51fb..33ad619 100644 --- a/lucene/core/src/java/org/apache/lucene/search/package.html +++ b/lucene/core/src/java/org/apache/lucene/search/package.html @@ -436,14 +436,16 @@ on the built-in available scoring models and extending or changing Similarity. that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will just defer to the Similarity's implementation: {@link org.apache.lucene.search.similarities.Similarity.SimWeight#normalize SimWeight#normalize(float,float)}.
  • - {@link org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.AtomicReaderContext, org.apache.lucene.util.Bits) - scorer(AtomicReaderContext context, Bits acceptDocs)} — + {@link + org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.AtomicReaderContext, + PostingFeatures flags, org.apache.lucene.util.Bits) + scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs)} — Construct a new {@link org.apache.lucene.search.Scorer Scorer} for this Weight. See The Scorer Class below for help defining a Scorer. As the name implies, the Scorer is responsible for doing the actual scoring of documents given the Query.
  • - {@link org.apache.lucene.search.Weight#bulkScorer(org.apache.lucene.index.AtomicReaderContext, boolean, org.apache.lucene.util.Bits) + {@link org.apache.lucene.search.Weight#bulkScorer(org.apache.lucene.index.AtomicReaderContext, boolean, org.apache.lucene.search.Weight.PostingFeatures, org.apache.lucene.util.Bits) scorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs)} — Construct a new {@link org.apache.lucene.search.BulkScorer BulkScorer} for this Weight. See The BulkScorer Class below for help defining a BulkScorer. This is an optional method, and most queries do not implement it. diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index d2e924e..6702db3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -148,14 +148,14 @@ public class PayloadNearQuery extends SpanNearQuery { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity, similarity.simScorer(stats, context)); } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - PayloadNearSpanScorer scorer = (PayloadNearSpanScorer) scorer(context, context.reader().getLiveDocs()); + PayloadNearSpanScorer scorer = (PayloadNearSpanScorer) scorer(context, PostingFeatures.POSITIONS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java index 04ecd80..292d121 100644 --- a/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -79,7 +79,7 @@ public class PayloadTermQuery extends SpanTermQuery { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context)); } @@ -176,7 +176,7 @@ public class PayloadTermQuery extends SpanTermQuery { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, context.reader().getLiveDocs()); + PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, PostingFeatures.POSITIONS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java index 74a098d..5b20392 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java @@ -17,12 +17,13 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; - -import org.apache.lucene.search.Weight; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.search.similarities.Similarity; +import java.io.IOException; + /** * Public for extension only. */ @@ -103,7 +104,12 @@ public class SpanScorer extends Scorer { public float sloppyFreq() throws IOException { return freq; } - + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return null; + } + @Override public long cost() { return spans.cost(); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index 0b20cdb..12704c1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -81,7 +81,7 @@ public class SpanWeight extends Weight { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { if (stats == null) { return null; } else { @@ -91,7 +91,7 @@ public class SpanWeight extends Weight { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - SpanScorer scorer = (SpanScorer) scorer(context, context.reader().getLiveDocs()); + SpanScorer scorer = (SpanScorer) scorer(context, PostingFeatures.POSITIONS, context.reader().getLiveDocs()); if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { diff --git a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java index 8f6f8fc..a448f4c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -20,14 +20,17 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.util.PriorityQueue; +import java.io.IOException; + /** * Holds all implementations of classes in the o.a.l.search package as a * back-compatibility test. It does not run any tests per-se, however if @@ -227,7 +230,12 @@ final class JustCompileSearch { public int advance(int target) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + @Override public long cost() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); @@ -318,7 +326,7 @@ final class JustCompileSearch { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanCoord.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanCoord.java index 41b2fbe..2be691a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanCoord.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanCoord.java @@ -17,9 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.concurrent.atomic.AtomicBoolean; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; @@ -36,6 +33,9 @@ import org.apache.lucene.util.LuceneTestCase; import org.junit.AfterClass; import org.junit.BeforeClass; +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; + /** * Tests coord() computation by BooleanQuery */ @@ -707,7 +707,7 @@ public class TestBooleanCoord extends LuceneTestCase { private void assertScore(final float expected, Query query) throws Exception { // test in-order Weight weight = searcher.createNormalizedWeight(query); - Scorer scorer = weight.scorer(reader.leaves().get(0), null); + Scorer scorer = weight.scorer(reader.leaves().get(0), Weight.PostingFeatures.DOCS_AND_FREQS, null); assertTrue(scorer.docID() == -1 || scorer.docID() == DocIdSetIterator.NO_MORE_DOCS); assertEquals(0, scorer.nextDoc()); assertEquals(expected, scorer.score(), 0.0001f); @@ -715,7 +715,7 @@ public class TestBooleanCoord extends LuceneTestCase { // test out-of-order (if supported) if (weight.scoresDocsOutOfOrder()) { final AtomicBoolean seen = new AtomicBoolean(false); - BulkScorer bulkScorer = weight.bulkScorer(reader.leaves().get(0), false, null); + BulkScorer bulkScorer = weight.bulkScorer(reader.leaves().get(0), false, Weight.PostingFeatures.DOCS_AND_FREQS, null); assertNotNull(bulkScorer); bulkScorer.score(new LeafCollector() { Scorer scorer; @@ -737,6 +737,11 @@ public class TestBooleanCoord extends LuceneTestCase { public boolean acceptsDocsOutOfOrder() { return true; } + + @Override + public Weight.PostingFeatures postingFeatures() { + return Weight.PostingFeatures.DOCS_AND_FREQS; + } }, 1); assertTrue(seen.get()); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanOr.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanOr.java index 7950703..90a8828 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanOr.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanOr.java @@ -15,9 +15,6 @@ package org.apache.lucene.search; * See the License for the specific language governing permissions and * limitations under the License. */ -import java.io.IOException; -import java.util.concurrent.atomic.AtomicInteger; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.TextField; @@ -25,11 +22,15 @@ import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.store.Directory; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; +import java.io.IOException; +import java.util.concurrent.atomic.AtomicInteger; + public class TestBooleanOr extends LuceneTestCase { private static String FIELD_T = "T"; @@ -183,7 +184,7 @@ public class TestBooleanOr extends LuceneTestCase { Weight w = s.createNormalizedWeight(bq); assertEquals(1, s.getIndexReader().leaves().size()); - BulkScorer scorer = w.bulkScorer(s.getIndexReader().leaves().get(0), false, null); + BulkScorer scorer = w.bulkScorer(s.getIndexReader().leaves().get(0), false, PostingFeatures.DOCS_AND_FREQS, null); final FixedBitSet hits = new FixedBitSet(docCount); final AtomicInteger end = new AtomicInteger(); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java index 55ed7e6..bd395f5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java @@ -17,14 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -38,6 +30,9 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Weight.PostingFeatures; +import org.apache.lucene.search.intervals.IntervalFilterQuery; +import org.apache.lucene.search.intervals.WithinIntervalFilter; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; @@ -46,6 +41,14 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NamedThreadFactory; import org.apache.lucene.util.TestUtil; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + public class TestBooleanQuery extends LuceneTestCase { public void testEquality() throws Exception { @@ -235,7 +238,7 @@ public class TestBooleanQuery extends LuceneTestCase { Weight weight = s.createNormalizedWeight(q); - Scorer scorer = weight.scorer(s.leafContexts.get(0), null); + Scorer scorer = weight.scorer(s.leafContexts.get(0), PostingFeatures.DOCS_AND_FREQS, null); // First pass: just use .nextDoc() to gather all hits final List hits = new ArrayList<>(); @@ -252,7 +255,7 @@ public class TestBooleanQuery extends LuceneTestCase { for(int iter2=0;iter2<10;iter2++) { weight = s.createNormalizedWeight(q); - scorer = weight.scorer(s.leafContexts.get(0), null); + scorer = weight.scorer(s.leafContexts.get(0), PostingFeatures.DOCS_AND_FREQS, null); if (VERBOSE) { System.out.println(" iter2=" + iter2); @@ -290,6 +293,51 @@ public class TestBooleanQuery extends LuceneTestCase { r.close(); d.close(); } + + public void testConjunctionPositions() throws IOException { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + { + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! Some like it hot, some" + + " like it cold, Some like it in the pot nine days old! Pease porridge hot! Pease porridge cold!", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + + { + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge cold! Pease porridge hot! Pease porridge in the pot nine days old! Some like it cold, some" + + " like it hot, Some like it in the pot nine days old! Pease porridge cold! Pease porridge hot!", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = new IndexSearcher(reader); + writer.close(); + + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field", "porridge")), BooleanClause.Occur.MUST); + bq.add(new TermQuery(new Term("field", "pease")),BooleanClause.Occur.MUST); + bq.add(new TermQuery(new Term("field", "hot!")), BooleanClause.Occur.MUST); + + { + IntervalFilterQuery filter = new IntervalFilterQuery(bq, new WithinIntervalFilter(3)); + TopDocs search = searcher.search(filter, 10); + ScoreDoc[] scoreDocs = search.scoreDocs; + assertEquals(2, search.totalHits); + assertEquals(0, scoreDocs[0].doc); + assertEquals(1, scoreDocs[1].doc); + } + reader.close(); + directory.close(); + } // LUCENE-4477 / LUCENE-4401: public void testBooleanSpanQuery() throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index 358a513..390d7fd 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -17,12 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; @@ -35,6 +29,12 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + public class TestBooleanScorer extends LuceneTestCase { private static final String FIELD = "category"; @@ -207,12 +207,12 @@ public class TestBooleanScorer extends LuceneTestCase { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) { throw new UnsupportedOperationException(); } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, PostingFeatures flags, Bits acceptDocs) { return new BulkScorer() { @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java index e842909..4993f45 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java @@ -17,10 +17,11 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; + public class TestCachingCollector extends LuceneTestCase { private static final double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB @@ -45,6 +46,11 @@ public class TestCachingCollector extends LuceneTestCase { @Override public int advance(int target) throws IOException { return 0; } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return IntervalIterator.NO_MORE_INTERVALS; + } @Override public long cost() { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java index 7b92605..a9e1a31 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java @@ -34,6 +34,7 @@ import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.spans.SpanQuery; @@ -180,7 +181,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { assertTrue(s.getTopReaderContext() instanceof AtomicReaderContext); final Weight dw = s.createNormalizedWeight(dq); AtomicReaderContext context = (AtomicReaderContext)s.getTopReaderContext(); - final Scorer ds = dw.scorer(context, context.reader().getLiveDocs()); + final Scorer ds = dw.scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); final boolean skipOk = ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS; if (skipOk) { fail("firsttime skipTo found a match? ... " @@ -196,7 +197,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { QueryUtils.check(random(), dq, s); final Weight dw = s.createNormalizedWeight(dq); AtomicReaderContext context = (AtomicReaderContext)s.getTopReaderContext(); - final Scorer ds = dw.scorer(context, context.reader().getLiveDocs()); + final Scorer ds = dw.scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); assertTrue("firsttime skipTo found no match", ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS); assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id")); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java index 33d1a57..03009d6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java @@ -17,14 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; @@ -36,6 +28,7 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.search.BooleanQuery.BooleanWeight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.search.similarities.Similarity.SimWeight; @@ -47,6 +40,14 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.junit.AfterClass; import org.junit.BeforeClass; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + /** tests BooleanScorer2's minShouldMatch */ @SuppressCodecs({"Lucene40", "Lucene41"}) public class TestMinShouldMatch2 extends LuceneTestCase { @@ -126,7 +127,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase { if (slow) { return new SlowMinShouldMatchScorer(weight, reader, searcher); } else { - return weight.scorer(reader.getContext(), null); + return weight.scorer(reader.getContext(), Weight.PostingFeatures.DOCS_ONLY, null); } } @@ -306,6 +307,11 @@ public class TestMinShouldMatch2 extends LuceneTestCase { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public float score() throws IOException { assert score != 0 : currentMatched; return (float)score * ((BooleanWeight) weight).coord(currentMatched, ((BooleanWeight) weight).maxCoord); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java index c856c69..5cdf28b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -20,10 +20,13 @@ package org.apache.lucene.search; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.document.Document; +import java.io.IOException; + public class TestPositiveScoresOnlyCollector extends LuceneTestCase { private static final class SimpleScorer extends Scorer { @@ -51,7 +54,12 @@ public class TestPositiveScoresOnlyCollector extends LuceneTestCase { idx = target; return idx < scores.length ? idx : NO_MORE_DOCS; } - + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + @Override public long cost() { return scores.length; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java index 28f78b0..79a60d5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java @@ -17,11 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; -import java.util.Comparator; -import java.util.Set; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; @@ -30,6 +25,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; @@ -39,6 +35,11 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; +import java.io.IOException; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Set; + public class TestQueryRescorer extends LuceneTestCase { private IndexSearcher getSearcher(IndexReader r) { @@ -443,7 +444,7 @@ public class TestQueryRescorer extends LuceneTestCase { } @Override - public Scorer scorer(final AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(final AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { return new Scorer(null) { int docID = -1; @@ -479,6 +480,11 @@ public class TestQueryRescorer extends LuceneTestCase { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public float score() throws IOException { int num = idToNum[Integer.parseInt(context.reader().document(docID).get("id"))]; if (reverse) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java index c79c843..0c1048b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -17,15 +17,16 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; + public class TestScoreCachingWrappingScorer extends LuceneTestCase { private static final class SimpleScorer extends Scorer { @@ -58,7 +59,12 @@ public class TestScoreCachingWrappingScorer extends LuceneTestCase { doc = target; return doc < scores.length ? doc : NO_MORE_DOCS; } - + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + @Override public long cost() { return scores.length; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java index 5ea7e12..e703aa1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java @@ -17,10 +17,6 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -29,10 +25,15 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + public class TestTermScorer extends LuceneTestCase { protected Directory directory; private static final String FIELD = "field"; @@ -78,7 +79,7 @@ public class TestTermScorer extends LuceneTestCase { Weight weight = indexSearcher.createNormalizedWeight(termQuery); assertTrue(indexSearcher.getTopReaderContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext)indexSearcher.getTopReaderContext(); - BulkScorer ts = weight.bulkScorer(context, true, context.reader().getLiveDocs()); + BulkScorer ts = weight.bulkScorer(context, true, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); // we have 2 documents with the term all in them, one document for all the // other values final List docs = new ArrayList<>(); @@ -140,7 +141,7 @@ public class TestTermScorer extends LuceneTestCase { Weight weight = indexSearcher.createNormalizedWeight(termQuery); assertTrue(indexSearcher.getTopReaderContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext) indexSearcher.getTopReaderContext(); - Scorer ts = weight.scorer(context, context.reader().getLiveDocs()); + Scorer ts = weight.scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); assertTrue("next did not return a doc", ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue("score is not correct", ts.score() == 1.6931472f); @@ -159,7 +160,7 @@ public class TestTermScorer extends LuceneTestCase { Weight weight = indexSearcher.createNormalizedWeight(termQuery); assertTrue(indexSearcher.getTopReaderContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext) indexSearcher.getTopReaderContext(); - Scorer ts = weight.scorer(context, context.reader().getLiveDocs()); + Scorer ts = weight.scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); assertTrue("Didn't skip", ts.advance(3) != DocIdSetIterator.NO_MORE_DOCS); // The next doc should be doc 5 assertTrue("doc should be number 5", ts.docID() == 5); diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/IntervalTestBase.java b/lucene/core/src/test/org/apache/lucene/search/intervals/IntervalTestBase.java new file mode 100644 index 0000000..f34e887 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/IntervalTestBase.java @@ -0,0 +1,287 @@ +package org.apache.lucene.search.intervals; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.Weight; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Locale; +import java.util.Set; +import java.util.TreeSet; + +/** + * Copyright (c) 2012 Lemur Consulting Ltd. + *

    + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public abstract class IntervalTestBase extends LuceneTestCase { + + protected Directory directory; + protected IndexReader reader; + protected IndexSearcher searcher; + + public static void checkFieldIntervals(Query q, IndexSearcher searcher, Object[][] expectedResults) throws IOException { + + MatchCollector m = new MatchCollector(); + searcher.search(q, m); + + Assert.assertEquals("Incorrect number of hits", expectedResults.length, m.getHitCount()); + Iterator matchIt = m.getMatches().iterator(); + for (int i = 0; i < expectedResults.length; i++) { + Object docMatches[] = expectedResults[i]; + int docid = (Integer) docMatches[0]; + for (int j = 1; j < docMatches.length; j += 3) { + String expectation = "Expected match at docid " + docid + ", position " + docMatches[j]; + Assert.assertTrue(expectation, matchIt.hasNext()); + Match match = matchIt.next(); + System.out.println(match); + Assert.assertEquals("Incorrect docid", docid, match.docid); + Assert.assertEquals("Incorrect field match", docMatches[j], match.field); + Assert.assertEquals("Incorrect match start position", docMatches[j + 1], match.start); + Assert.assertEquals("Incorrect match end position", docMatches[j + 2], match.end); + } + } + + } + + /** + * Run a query against a searcher, and check that the collected intervals from the query match + * the expected results. + * @param q the query + * @param searcher the searcher + * @param expectedResults and int[][] detailing the expected results, in the format + * { { docid1, startoffset1, endoffset1, startoffset2, endoffset2, ... }, + * { docid2, startoffset1, endoffset1, startoffset2, endoffset2, ...}, ... } + * @throws IOException + */ + public static void checkIntervalOffsets(Query q, IndexSearcher searcher, int[][] expectedResults) throws IOException { + + MatchCollector m = new MatchCollector(); + searcher.search(q, m); + + Assert.assertEquals("Incorrect number of hits", expectedResults.length, m.getHitCount()); + Iterator matchIt = m.getMatches().iterator(); + for (int i = 0; i < expectedResults.length; i++) { + int docMatches[] = expectedResults[i]; + int docid = docMatches[0]; + for (int j = 1; j < docMatches.length; j += 2) { + String expectation = "Expected match at docid " + docid + ", position " + docMatches[j]; + Assert.assertTrue(expectation, matchIt.hasNext()); + Match match = matchIt.next(); + System.err.println(match); + Assert.assertEquals("Incorrect docid", match.docid, docid); + Assert.assertEquals("Incorrect match offset", docMatches[j], match.startOffset); + Assert.assertEquals("Incorrect match end offset", docMatches[j + 1], match.endOffset); + } + } + Assert.assertFalse("Unexpected matches!", matchIt.hasNext()); + + } + + /** + * Run a query against a searcher, and check that the collected intervals from the query match + * the expected results. + * @param q the query + * @param searcher the searcher + * @param expectedResults and int[][] detailing the expected results, in the format + * { { docid1, startpos1, endpos1, startpos2, endpos2, ... }, + * { docid2, startpos1, endpos1, startpos2, endpos2, ...}, ... } + * @throws IOException + */ + public static void checkIntervals(Query q, IndexSearcher searcher, int[][] expectedResults) throws IOException { + + MatchCollector m = new MatchCollector(); + searcher.search(q, m); + + Assert.assertEquals("Incorrect number of hits", expectedResults.length, m.getHitCount()); + Iterator matchIt = m.getMatches().iterator(); + for (int i = 0; i < expectedResults.length; i++) { + int docMatches[] = expectedResults[i]; + int docid = docMatches[0]; + for (int j = 1; j < docMatches.length; j += 2) { + String expectation = "Expected match at docid " + docid + ", position " + docMatches[j]; + Assert.assertTrue(expectation, matchIt.hasNext()); + Match match = matchIt.next(); + System.out.println(match); + Assert.assertEquals("Incorrect docid", docid, match.docid); + Assert.assertEquals("Incorrect match start position for doc " + docid, docMatches[j], match.start); + Assert.assertEquals("Incorrect match end position for doc " + docid, docMatches[j + 1], match.end); + } + } + Assert.assertFalse("Unexpected matches!", matchIt.hasNext()); + + } + + public static void checkScores(Query q, IndexSearcher searcher, int... expectedDocs) throws IOException { + TopDocs hits = searcher.search(q, 1000); + Assert.assertEquals("Wrong number of hits", expectedDocs.length, hits.totalHits); + for (int i = 0; i < expectedDocs.length; i++) { + Assert.assertEquals("Docs not scored in order", expectedDocs[i], hits.scoreDocs[i].doc); + } + CheckHits.checkExplanations(q, "field", searcher); + } + + protected abstract void addDocs(RandomIndexWriter writer) throws IOException; + + @Before + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + //config.setCodec(Codec.forName("SimpleText")); + config.setCodec(Codec.forName("Asserting")); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, config); + addDocs(writer); + reader = writer.getReader(); + writer.close(); + searcher = new IndexSearcher(reader); + } + + @After + public void tearDown() throws Exception { + reader.close(); + directory.close(); + super.tearDown(); + } + + public static final String FIELD = "field"; + + public TermQuery makeTermQuery(String text) { + return new TermQuery(new Term(FIELD, text)); + } + + protected Query makeOrQuery(Query... queries) { + BooleanQuery q = new BooleanQuery(); + for (Query subquery : queries) { + q.add(subquery, BooleanClause.Occur.SHOULD); + } + return q; + } + + protected Query makeAndQuery(Query... queries) { + BooleanQuery q = new BooleanQuery(); + for (Query subquery : queries) { + q.add(subquery, BooleanClause.Occur.MUST); + } + return q; + } + + public static class Match implements Comparable { + + public final int docid; + public final int start; + public final int end; + public final int startOffset; + public final int endOffset; + public final String field; + public final boolean composite; + + public Match(int docid, Interval interval, boolean composite) { + this.docid = docid; + this.start = interval.begin; + this.end = interval.end; + this.startOffset = interval.offsetBegin; + this.endOffset = interval.offsetEnd; + this.field = interval.field; + this.composite = composite; + } + + @Override + public int compareTo(Match o) { + if (this.docid != o.docid) + return this.docid - o.docid; + if (this.field != o.field) + return this.field.compareTo(o.field); + if (this.start != o.start) + return this.start - o.start; + return o.end - this.end; + } + + @Override + public String toString() { + return String.format(Locale.ROOT, "%d::%s:%d[%d]->%d[%d]%s", + docid, field, start, startOffset, end, endOffset, composite ? "C" : ""); + } + } + + public static class MatchCollector extends SimpleCollector implements IntervalCollector { + + private IntervalIterator intervals; + private Interval current; + private Set matches = new TreeSet(); + private int hitCount; + + @Override + public void setScorer(Scorer scorer) throws IOException { + this.intervals = scorer.intervals(true); + } + + @Override + public void collect(int doc) throws IOException { + hitCount++; + intervals.scorerAdvanced(doc); + while ((current = intervals.next()) != null) { + //System.out.println("**Collecting " + doc + ":" + current); + intervals.collect(this); + } + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return false; + } + + @Override + public void collectLeafPosition(Scorer scorer, Interval interval, int docID) { + matches.add(new Match(docID, interval, false)); + } + + @Override + public void collectComposite(Scorer scorer, Interval interval, int docID) { + matches.add(new Match(docID, interval, true)); + } + + @Override + public Weight.PostingFeatures postingFeatures() { + return Weight.PostingFeatures.OFFSETS; + } + + public Set getMatches() { + return matches; + } + + public int getHitCount() { + return hitCount; + } + } +} diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestBasicIntervals.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestBasicIntervals.java new file mode 100644 index 0000000..fb57468 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestBasicIntervals.java @@ -0,0 +1,274 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; + +import java.io.IOException; + +public class TestBasicIntervals extends IntervalTestBase { + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + for (String content : docFields) { + Document doc = new Document(); + doc.add(newField(FIELD, content, TextField.TYPE_NOT_STORED)); + writer.addDocument(doc); + } + } + + private String[] docFields = { + "w1 w2 w3 w4 w5", //0 + "w1 w3 w2 w3",//1 + "w1 xx w2 yy w3",//2 + "w1 w3 xx w2 yy w3",//3 + "u2 u2 u1", //4 + "u2 xx u2 u1",//5 + "u2 u2 xx u1", //6 + "u2 xx u2 yy u1", //7 + "u2 xx u1 u2",//8 + "u1 u2 xx u2",//9 + "u2 u1 xx u2",//10 + "t1 t2 t1 t3 t2 t3", + "a b x x c"};//11 + + public void testOverlappingWithinDisjunctions() throws Exception { + Query q = makeOrQuery( + new UnorderedNearQuery(6, false, makeTermQuery("a"), makeTermQuery("c")), + new UnorderedNearQuery(6, false, makeTermQuery("b"), makeTermQuery("c")) + ); + checkIntervals(q, searcher, new int[][]{ + { 12, 0, 4, 1, 4 } + }); + } + + public void testSameStartPositionWithinDisjunctions() throws Exception { + Query q = makeOrQuery( + new UnorderedNearQuery(6, false, makeTermQuery("a"), makeTermQuery("b")), + new UnorderedNearQuery(6, false, makeTermQuery("a"), makeTermQuery("c")) + ); + checkIntervals(q, searcher, new int[][]{ + { 12, 0, 4, 0, 1 } + }); + } + + public void testNearOrdered01() throws Exception { + Query q = new OrderedNearQuery(0, false, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 } + }); + } + + public void testNearOrdered02() throws Exception { + Query q = new OrderedNearQuery(1, false, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 }, + { 1, 0, 3 } + }); + } + + public void testNearOrdered03() throws Exception { + Query q = new OrderedNearQuery(2, false, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 }, + { 1, 0, 3 }, + { 2, 0, 4 } + }); + } + + public void testNearOrdered04() throws Exception { + Query q = new OrderedNearQuery(3, false, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 }, + { 1, 0, 3 }, + { 2, 0, 4 }, + { 3, 0, 5 } + }); + } + + public void testNearOrdered05() throws Exception { + Query q = new OrderedNearQuery(4, false, makeTermQuery("w1"), makeTermQuery("w2"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2 }, + { 1, 0, 3 }, + { 2, 0, 4 }, + { 3, 0, 5 } + }); + } + + public void testNearOrderedEqual01() throws Exception { + Query q = new OrderedNearQuery(0, false, makeTermQuery("w1"), makeTermQuery("w3"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{}); + } + + public void testNearOrderedEqual02() throws Exception { + Query q = new OrderedNearQuery(1, false, makeTermQuery("w1"), makeTermQuery("w3"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 1, 0, 3 } + }); + } + + public void testNearOrderedEqual03() throws Exception { + Query q = new OrderedNearQuery(2, false, makeTermQuery("w1"), makeTermQuery("w3"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 1, 0, 3 } + }); + } + + public void testNearOrderedEqual04() throws Exception { + Query q = new OrderedNearQuery(3, false, makeTermQuery("w1"), makeTermQuery("w3"), makeTermQuery("w3")); + checkIntervals(q, searcher, new int[][]{ + { 1, 0, 3 }, + { 3, 0, 5 } + }); + } + + public void testNearOrderedEqual11() throws Exception { + Query q = new OrderedNearQuery(0, false, makeTermQuery("u2"), makeTermQuery("u2"), makeTermQuery("u1")); + checkIntervals(q, searcher, new int[][]{ + { 4, 0, 2 } + }); + } + + public void testNearOrderedEqual13() throws Exception { + Query q = new OrderedNearQuery(1, false, makeTermQuery("u2"), makeTermQuery("u2"), makeTermQuery("u1")); + checkIntervals(q, searcher, new int[][]{ + { 4, 0, 2 }, + { 5, 0, 3 }, + { 6, 0, 3 } + }); + } + + public void testNearOrderedEqual14() throws Exception { + Query q = new OrderedNearQuery(2, false, makeTermQuery("u2"), makeTermQuery("u2"), makeTermQuery("u1")); + checkIntervals(q, searcher, new int[][]{ + { 4, 0, 2 }, + { 5, 0, 3 }, + { 6, 0, 3 }, + { 7, 0, 4 } + }); + } + + public void testNearOrderedEqual15() throws Exception { + Query q = new OrderedNearQuery(3, false, makeTermQuery("u2"), makeTermQuery("u2"), makeTermQuery("u1")); + checkIntervals(q, searcher, new int[][]{ + { 4, 0, 2 }, + { 5, 0, 3 }, + { 6, 0, 3 }, + { 7, 0, 4 } + }); + } + + public void testNearOrderedOverlap() throws Exception { + Query q = new OrderedNearQuery(3, false, makeTermQuery("t1"), makeTermQuery("t2"), makeTermQuery("t3")); + checkIntervals(q, searcher, new int[][]{ + { 11, 0, 3, 2, 5 } + }); + } + + public void testNearUnordered() throws Exception { + Query q = new UnorderedNearQuery(0, false, makeTermQuery("u1"), makeTermQuery("u2")); + checkIntervals(q, searcher, new int[][]{ + { 4, 1, 2 }, + { 5, 2, 3 }, + { 8, 2, 3 }, + { 9, 0, 1 }, + { 10, 0, 1 } + }); + } + /* + "w1 w2 w3 w4 w5", //0 + "w1 w3 w2 w3",//1 + "w1 xx w2 yy w3",//2 + "w1 w3 xx w2 yy w3",//3 + "u2 u2 u1", //4 + "u2 xx u2 u1",//5 + "u2 u2 xx u1", //6 + "u2 xx u2 yy u1", //7 + "u2 xx u1 u2",//8 + "u1 u2 xx u2",//9 + "u2 u1 xx u2",//10 + "t1 t2 t1 t3 t2 t3"};//11 + */ + + // ((u1 near u2) near xx) + public void testNestedNear() throws Exception { + + Query q = new UnorderedNearQuery(0, false, makeTermQuery("u1"), makeTermQuery("u2")); + BooleanQuery topq = new BooleanQuery(); + topq.add(q, Occur.MUST); + topq.add(makeTermQuery("xx"), Occur.MUST); + + checkIntervals(topq, searcher, new int[][]{ + { 5, 1, 1, 2, 3 }, + { 8, 1, 1, 2, 3 }, + { 9, 0, 1, 2, 2 }, + { 10, 0, 1, 2, 2 } + }); + + } + + public void testOrSingle() throws Exception { + Query q = makeOrQuery(makeTermQuery("w5")); + checkIntervals(q, searcher, new int[][]{ + { 0, 4, 4 } + }); + } + + public void testOrPartialMatch() throws Exception { + Query q = makeOrQuery(makeTermQuery("w5"), makeTermQuery("xx")); + checkIntervals(q, searcher, new int[][]{ + { 0, 4, 4 }, + { 2, 1, 1 }, + { 3, 2, 2 }, + { 5, 1, 1 }, + { 6, 2, 2 }, + { 7, 1, 1 }, + { 8, 1, 1 }, + { 9, 2, 2 }, + { 10, 2, 2 }, + }); + } + + public void testOrDisjunctionMatch() throws Exception { + Query q = makeOrQuery(makeTermQuery("w5"), makeTermQuery("yy")); + checkIntervals(q, searcher, new int[][]{ + { 0, 4, 4 }, + { 2, 3, 3 }, + { 3, 4, 4 }, + { 7, 3, 3 } + }); + } + + // "t1 t2 t1 t3 t2 t3" + // ----------- + // -------- + // -------- + public void testOrSingleDocument() throws Exception { + Query q = makeOrQuery(makeTermQuery("t1"), makeTermQuery("t2"), makeTermQuery("t3")); + checkIntervals(q, searcher, new int[][]{ + { 11, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5 } + }); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestBlockIntervalIterator.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestBlockIntervalIterator.java new file mode 100644 index 0000000..82afff9 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestBlockIntervalIterator.java @@ -0,0 +1,100 @@ +package org.apache.lucene.search.intervals; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; + +import java.io.IOException; + +public class TestBlockIntervalIterator extends IntervalTestBase { + + protected void addDocs(RandomIndexWriter writer) throws IOException { + { + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! Some like it hot, some" + + " like it cold, Some like it in the pot nine days old! Pease porridge hot! Pease porridge cold!", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + + { + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge cold! Pease porridge hot! Pease porridge in the pot nine days old! Some like it cold, some" + + " like it hot, Some like it in the pot nine days old! Pease porridge cold! Pease porridge hot!", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + } + + public void testMatchingBlockIntervalFilter() throws IOException { + + Query query = makeAndQuery( + new TermQuery(new Term("field", "pease")), + new TermQuery(new Term("field", "porridge")), + new TermQuery(new Term("field", "hot!")) + ); + IntervalFilterQuery filterQuery = new IntervalFilterQuery(query, new BlockIntervalFilter(false)); + + checkIntervals(filterQuery, searcher, new int[][]{ + { 0, 0, 2, 31, 33 }, + { 1, 3, 5, 34, 36 } + }); + + } + + public void testPartialMatchingBlockIntervalFilter() throws IOException { + + Query query = makeAndQuery( + new TermQuery(new Term("field", "pease")), + new TermQuery(new Term("field", "porridge")), + new TermQuery(new Term("field", "hot!")), + new TermQuery(new Term("field", "pease")), + new TermQuery(new Term("field", "porridge")), + new TermQuery(new Term("field", "cold!")) + ); + IntervalFilterQuery filterQuery = new IntervalFilterQuery(query, new BlockIntervalFilter(false)); + + checkIntervals(filterQuery, searcher, new int[][]{ + { 0, 0, 5, 31, 36 }, + }); + + + } + + public void testNonMatchingBlockIntervalFilter() throws IOException { + + Query query = makeAndQuery( + new TermQuery(new Term("field", "pease")), + new TermQuery(new Term("field", "hot!")) + ); + IntervalFilterQuery filterQuery = new IntervalFilterQuery(query, new BlockIntervalFilter()); + + checkIntervals(filterQuery, searcher, new int[][]{}); + + } + + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestBrouwerianQuery.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestBrouwerianQuery.java new file mode 100644 index 0000000..d6300c5 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestBrouwerianQuery.java @@ -0,0 +1,116 @@ +package org.apache.lucene.search.intervals; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.Query; + +import java.io.IOException; + +public class TestBrouwerianQuery extends IntervalTestBase { + + protected void addDocs(RandomIndexWriter writer) throws IOException { + { + Document doc = new Document(); + doc.add(newField( + "field", + "The quick brown fox jumps over the lazy dog", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + + { + Document doc = new Document(); + doc.add(newField( + "field", + "The quick brown duck jumps over the lazy dog with the quick brown fox jumps and then it jumps again", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + } + + public void testBrouwerianBooleanQuery() throws IOException { + + Query query = new OrderedNearQuery(2, false, makeTermQuery("the"), + makeTermQuery("quick"), makeTermQuery("jumps")); + Query sub = makeTermQuery("duck"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 4 }, + { 1, 10, 14 } + }); + } + + public void testBrouwerianBooleanQueryExcludedDoesNotExist() throws IOException { + + Query query = new OrderedNearQuery(2, false, makeTermQuery("the"), + makeTermQuery("quick"), makeTermQuery("jumps")); + Query sub = makeTermQuery("blox"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 4 }, + { 1, 0, 4, 10, 14 } + }); + } + + public void testBrouwerianOverlapQuery() throws IOException { + // We want to find 'jumps NOT WITHIN 2 positions of duck' + Query sub = new UnorderedNearQuery(2, false, makeTermQuery("jumps"), makeTermQuery("duck")); + Query query = makeTermQuery("jumps"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{ + { 0, 4, 4 }, + { 1, 14, 14, 18, 18 } + }); + } + + public void testBrouwerianNonExistentOverlapQuery() throws IOException { + Query sub = new UnorderedNearQuery(2, false, makeTermQuery("dog"), makeTermQuery("over")); + Query query = makeTermQuery("dog"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{}); + } + + public void testBrouwerianExistentOverlapQuery() throws IOException { + Query sub = new UnorderedNearQuery(1, false, makeTermQuery("dog"), makeTermQuery("over")); + Query query = makeTermQuery("dog"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{ + { 0, 8, 8 }, + { 1, 8, 8 } + }); + } + + public void testBrouwerianDisjunction() throws IOException { + Query sub = new UnorderedNearQuery(1, false, makeTermQuery("jumps"), + makeOrQuery(makeTermQuery("fox"), makeTermQuery("duck"))); + Query query = makeTermQuery("jumps"); + NonOverlappingQuery q = new NonOverlappingQuery(query, sub); + + checkIntervals(q, searcher, new int[][]{ + { 1, 18, 18 } + }); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestConjunctionIntervalIterator.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestConjunctionIntervalIterator.java new file mode 100644 index 0000000..4fbc94d --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestConjunctionIntervalIterator.java @@ -0,0 +1,87 @@ +package org.apache.lucene.search.intervals; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; + +import java.io.IOException; + +public class TestConjunctionIntervalIterator extends IntervalTestBase { + + protected void addDocs(RandomIndexWriter writer) throws IOException { + { + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! Some like it hot, some" + + " like it cold, Some like it in the pot nine days old! Pease porridge hot! Pease porridge cold!", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + + { + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge cold! Pease porridge hot! Pease porridge in the pot nine days old! Some like it cold, some" + + " like it hot, Some like it in the pot nine days old! Pease porridge cold! Pease porridge hot!", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + } + + public void testConjunctionOrderedQuery() throws IOException { + Query q = new OrderedNearQuery(0, false, makeTermQuery("pease"), + makeTermQuery("porridge"), makeTermQuery("hot!")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2, 31, 33 }, + { 1, 3, 5, 34, 36 } + }); + } + + public void testConjunctionUnorderedQuery() throws IOException { + Query q = new UnorderedNearQuery(0, false, makeTermQuery("pease"), + makeTermQuery("porridge"), makeTermQuery("hot!")); + checkIntervals(q, searcher, new int[][]{ + { 0, 0, 2, 1, 3, 2, 4, 31, 33, 32, 34, 33, 35 }, + { 1, 3, 5, 4, 6, 5, 7, 34, 36 } + }); + } + + // andnot(andnot(pease, or(porridge, flurble)), or(foo, bar)) + public void testConjunctionExclusionQuery() throws IOException { + BooleanQuery andnotinner = new BooleanQuery(); + andnotinner.add(makeTermQuery("pease"), Occur.MUST); + BooleanQuery andnotinneror = new BooleanQuery(); + andnotinneror.add(makeTermQuery("porridge"), Occur.SHOULD); + andnotinneror.add(makeTermQuery("flurble"), Occur.SHOULD); + andnotinner.add(andnotinneror, Occur.MUST_NOT); + BooleanQuery outer = new BooleanQuery(); + outer.add(andnotinner, Occur.MUST); + BooleanQuery andnotouteror = new BooleanQuery(); + andnotouteror.add(makeTermQuery("foo"), Occur.SHOULD); + andnotouteror.add(makeTermQuery("bar"), Occur.SHOULD); + outer.add(andnotouteror, Occur.MUST_NOT); + checkIntervals(outer, searcher, new int[][]{}); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestDisjunctionIntervalIterator.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestDisjunctionIntervalIterator.java new file mode 100644 index 0000000..8b2e7d5 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestDisjunctionIntervalIterator.java @@ -0,0 +1,131 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; + +import java.io.IOException; + +public class TestDisjunctionIntervalIterator extends IntervalTestBase { + + protected void addDocs(RandomIndexWriter writer) throws IOException { + { + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! Some like it hot, some" + + " like it cold, Some like it in the pot nine days old! Pease porridge hot! Pease porridge cold!", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + + { + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge cold! Pease porridge hot! Pease porridge in the pot nine days old! Some like it cold, some" + + " like it hot, Some like it in the pot nine days old! Pease porridge cold! Pease porridge hot!", + TextField.TYPE_STORED)); + writer.addDocument(doc); + } + + { + Document doc = new Document(); + doc.add(newField("field", "The quick brown fox jumps over the lazy porridge", TextField.TYPE_NOT_STORED)); + writer.addDocument(doc); + } + } + + public void testDisjunctionOverConjunctions() throws IOException { + + BooleanQuery conj1 = new BooleanQuery(); + conj1.add(makeTermQuery("hot!"), Occur.MUST); + conj1.add(makeTermQuery("porridge"), Occur.MUST); + + BooleanQuery conj2 = new BooleanQuery(); + conj2.add(makeTermQuery("fox"), Occur.MUST); + conj2.add(makeTermQuery("porridge"), Occur.MUST); + + BooleanQuery disj = new BooleanQuery(); + disj.add(conj1, Occur.SHOULD); + disj.add(conj2, Occur.SHOULD); + + checkIntervals(disj, searcher, new int[][]{ + { 0, 1, 1, 2, 2, 4, 4, 7, 7, 32, 32, 33, 33, 35, 35 }, + { 1, 1, 1, 4, 4, 5, 5, 7, 7, 32, 32, 35, 35, 36, 36 }, + { 2, 3, 3, 8, 8 }, + }); + + } + + public void testDisjunctionRangePositionsBooleanQuery() throws IOException { + + Query query = makeOrQuery( + new TermQuery(new Term("field", "porridge")), + new TermQuery(new Term("field", "pease")), + new TermQuery(new Term("field", "hot!")) + ); + + IntervalFilterQuery rangeFilter = new IntervalFilterQuery(query, new RangeIntervalFilter(0, 2)); + checkIntervals(rangeFilter, searcher, new int[][]{ + { 0, 0, 0, 1, 1, 2, 2 }, + { 1, 0, 0, 1, 1 } + }); + + } + + public void testDisjunctionPartialMatchQuery() throws IOException { + + BooleanQuery query = new BooleanQuery(); + query.add(new BooleanClause(new TermQuery(new Term("field", "porridge")), Occur.SHOULD)); + query.add(new BooleanClause(new TermQuery(new Term("field", "fox")), Occur.SHOULD)); + + checkIntervals(query, searcher, new int[][]{ + { 0, 1, 1, 4, 4, 7, 7, 32, 32, 35, 35 }, + { 1, 1, 1, 4, 4, 7, 7, 32, 32, 35, 35}, + { 2, 3, 3, 8, 8} + }); + } + + public void testDisjunctionFullMatchQuery() throws IOException { + + BooleanQuery query = new BooleanQuery(); + query.add(new BooleanClause(new TermQuery(new Term("field", "porridge")), Occur.SHOULD)); + query.add(new BooleanClause(new TermQuery(new Term("field", "pease")), Occur.SHOULD)); + query.add(new BooleanClause(new TermQuery(new Term("field", "hot!")), Occur.SHOULD)); + + checkIntervals(query, searcher, new int[][]{ + { 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 6, 6, 7, 7, + 31, 31, 32, 32, 33, 33, 34, 34, 35, 35 }, + { 1, 0, 0, 1, 1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, + 31, 31, 32, 32, 34, 34, 35, 35, 36, 36}, + { 2, 8, 8 } + }); + + } + +} + diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestExoticQueries.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestExoticQueries.java new file mode 100644 index 0000000..c58a6cd --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestExoticQueries.java @@ -0,0 +1,88 @@ +package org.apache.lucene.search.intervals; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.junit.Ignore; + +import java.io.IOException; + +/** + * Copyright (c) 2012 Lemur Consulting Ltd. + *

    + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestExoticQueries extends IntervalTestBase { + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! Some like it hot, some" + + " like it cold, Some like it in the pot nine days old! Pease porridge hot! Pease porridge cold!", + TextField.TYPE_NOT_STORED)); + writer.addDocument(doc); + } + + public void testExactPhraseQuery() throws IOException { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "porridge")); + query.add(new Term("field", "hot!")); + checkIntervals(query, searcher, new int[][]{ + { 0, 0, 2, 0, 0, 1, 1, 2, 2, 31, 33, 31, 31, 32, 32, 33, 33 } + }); + } + + @Ignore + public void testSloppyPhraseQuery() throws IOException { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "hot!")); + query.setSlop(1); + checkIntervals(query, searcher, new int[][]{ + { 0, 0, 2, 0, 0, 2, 2, 31, 33, 31, 31, 33, 33 } + }); + } + + @Ignore + public void testManyTermSloppyPhraseQuery() throws IOException { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "porridge")); + query.add(new Term("field", "pot")); + query.setSlop(2); + checkIntervals(query, searcher, new int[][]{ + { 0, 6, 10, 6, 6, 7, 7, 10, 10 } + }); + } + + public void testMultiTermPhraseQuery() throws IOException { + MultiPhraseQuery query = new MultiPhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "porridge")); + query.add(new Term[] {new Term("field", "hot!"), new Term("field", "cold!")}); + checkIntervals(query, searcher, new int[][]{ + { 0, 0, 2, 0, 0, 1, 1, 2, 2, + 3, 5, 3, 3, 4, 4, 5, 5, + 31, 33, 31, 31, 32, 32, 33, 33, + 34, 36, 34, 34, 35, 35, 36, 36 } + }); + } +} + diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestFieldedIntervals.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestFieldedIntervals.java new file mode 100644 index 0000000..5071dcb --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestFieldedIntervals.java @@ -0,0 +1,141 @@ +package org.apache.lucene.search.intervals; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.TermQuery; +import org.junit.Test; + +import java.io.IOException; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestFieldedIntervals extends IntervalTestBase { + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED); + fieldType.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + Document doc = new Document(); + doc.add(newField("field1", "Pease porridge hot! Pease porridge cold!", fieldType)); + doc.add(newField("field2", "Some like it hot! Some like it cold", fieldType)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newField("field1", "Pease porridge warm! Pease porridge tepid!", fieldType)); + doc.add(newField("field2", "Some like it warm! Some like it tepid", fieldType)); + doc.add(newField("field3", "An extra field warm!", fieldType)); + writer.addDocument(doc); + } + + // field1:and(pease, or(porridge, cold)) + @Test + public void testNestedBooleanOnOneField() throws Exception { + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field1", "porridge")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("field1", "cold!")), BooleanClause.Occur.SHOULD); + BooleanQuery pbq = new BooleanQuery(); + pbq.add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.MUST); + pbq.add(bq, BooleanClause.Occur.MUST); + checkFieldIntervals(pbq, searcher, new Object[][]{ + { 0, "field1", 0, 0, "field1", 1, 1, "field1", 3, 3, "field1", 4, 4, "field1", 5, 5 }, + { 1, "field1", 0, 0, "field1", 1, 1, "field1", 3, 3, "field1", 4, 4 } + }); + } + + @Test + public void testSimpleBooleanOnTwoFields() throws IOException { + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field1", "warm!")), BooleanClause.Occur.MUST); + bq.add(new TermQuery(new Term("field2", "warm!")), BooleanClause.Occur.MUST); + checkFieldIntervals(bq, searcher, new Object[][]{ + { 1, "field1", 2, 2, "field2", 3, 3 } + }); + } + + @Test + public void testSimpleBooleanOnDisjointFields() throws IOException { + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field1", "hot!")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("field2", "warm!")), BooleanClause.Occur.SHOULD); + checkFieldIntervals(bq, searcher, new Object[][]{ + { 0, "field1", 2, 2 }, + { 1, "field2", 3, 3 } + }); + } + + @Test + public void testEquivalentPositionsOnSeparateFieldsDisjunction() throws IOException { + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("field2", "some")), BooleanClause.Occur.SHOULD); + checkFieldIntervals(bq, searcher, new Object[][]{ + { 0, "field1", 0, 0, "field1", 3, 3, "field2", 0, 0, "field2", 4, 4 }, + { 1, "field1", 0, 0, "field1", 3, 3, "field2", 0, 0, "field2", 4, 4 }, + }); + } + + @Test + public void testEquivalentPositionsOnSeparateFieldsConjunction() throws IOException { + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.MUST); + bq.add(new TermQuery(new Term("field2", "some")), BooleanClause.Occur.MUST); + checkFieldIntervals(bq, searcher, new Object[][]{ + { 0, "field1", 0, 0, "field1", 3, 3, "field2", 0, 0, "field2", 4, 4 }, + { 1, "field1", 0, 0, "field1", 3, 3, "field2", 0, 0, "field2", 4, 4 }, + }); + } + + @Test + public void testEquivalentPositionsOnSeparateFieldsConjunctionOfDisjunction() throws IOException { + + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("field2", "some")), BooleanClause.Occur.SHOULD); + + BooleanQuery superq = new BooleanQuery(); + superq.add(bq, BooleanClause.Occur.MUST); + superq.add(new TermQuery(new Term("field2", "like")), BooleanClause.Occur.MUST); + + checkFieldIntervals(superq, searcher, new Object[][]{ + {0, "field1", 0, 0, "field1", 3, 3, "field2", 0, 0, "field2", 1, 1, "field2", 4, 4, "field2", 5, 5}, + {1, "field1", 0, 0, "field1", 3, 3, "field2", 0, 0, "field2", 1, 1, "field2", 4, 4, "field2", 5, 5}, + }); + } + + @Test + public void testThirdField() throws IOException { + + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field1", "pease")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("field2", "some")), BooleanClause.Occur.SHOULD); + + BooleanQuery superbq = new BooleanQuery(); + superbq.add(bq, BooleanClause.Occur.MUST); + superbq.add(new TermQuery(new Term("field3", "an")), BooleanClause.Occur.MUST); + + checkFieldIntervals(superbq, searcher, new Object[][]{ + { 1, "field1", 0, 0, "field1", 3, 3, "field2", 0, 0, "field2", 4, 4, "field3", 0, 0 } + }); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestFreqFilterQueries.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestFreqFilterQueries.java new file mode 100644 index 0000000..911a840 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestFreqFilterQueries.java @@ -0,0 +1,122 @@ +package org.apache.lucene.search.intervals; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.junit.Test; + +import java.io.IOException; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestFreqFilterQueries extends IntervalTestBase { + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + for (String content : DOCS) { + Document doc = new Document(); + doc.add(newField(FIELD, content, TextField.TYPE_NOT_STORED)); + doc.add(newField("field2", "plum", TextField.TYPE_NOT_STORED)); + writer.addDocument(doc); + } + } + + public static final String[] DOCS = new String[] { + "banana plum apple", + "apple apple apple apple apple", + "apple apple apple apple banana apple strawberry banana apple", + "banana plum apple", + "plum apple apple apple apple apple", + "strawberry strawhat strawman" + }; + + @Test + public void testExactFrequencyFilterQuery() throws IOException { + IntervalFilterQuery query = new IntervalFilterQuery(makeTermQuery("apple"), new RangeFrequencyFilter(5, 5)); + checkIntervals(query, searcher, new int[][]{ + { 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }, + { 4, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5 } + }); + } + + @Test + public void testMinimumFrequencyFilterQuery() throws IOException { + IntervalFilterQuery query = new IntervalFilterQuery(makeTermQuery("apple"), new MinFrequencyFilter(5)); + checkIntervals(query, searcher, new int[][]{ + { 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }, + { 2, 0, 0, 1, 1, 2, 2, 3, 3, 5, 5, 8, 8 }, + { 4, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5 } + }); + } + + @Test + public void testMaximumFrequencyFilterQuery() throws IOException { + IntervalFilterQuery query = new IntervalFilterQuery(makeTermQuery("apple"), new RangeFrequencyFilter(1, 5)); + checkIntervals(query, searcher, new int[][]{ + { 0, 2, 2 }, + { 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }, + { 3, 2, 2 }, + { 4, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5 }, + }); + } + + @Test + public void testMinFreqOverDisjunction() throws IOException { + Query q = makeOrQuery(makeTermQuery("banana"), makeTermQuery("plum")); + checkIntervals(new IntervalFilterQuery(q, new MinFrequencyFilter(2)), searcher, new int[][]{ + { 0, 0, 0, 1, 1 }, + { 2, 4, 4, 7, 7 }, + { 3, 0, 0, 1, 1 } + }); + } + + @Test + public void testMinFreqOverWildcard() throws IOException { + PrefixQuery fq = new PrefixQuery(new Term(FIELD, "straw")); + fq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); + + checkIntervals(fq, searcher, new int[][]{ + {2, 6, 6}, + {5, 0, 0, 1, 1, 2, 2} + }); + checkIntervals(new IntervalFilterQuery(fq, new MinFrequencyFilter(2)), searcher, new int[][]{ + { 5, 0, 0, 1, 1, 2, 2 } + }); + } + + @Test + public void testMinFreqOverMultipleFields() throws IOException { + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term(FIELD, "plum")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("field2", "plum")), BooleanClause.Occur.SHOULD); + + checkFieldIntervals(new IntervalFilterQuery(bq, new MinFrequencyFilter(2)), searcher, new Object[][]{ + { 0, "field", 1, 1, "field2", 0, 0 }, + { 3, "field", 1, 1, "field2", 0, 0 }, + { 4, "field", 0, 0, "field2", 0, 0 } + }); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestIntervalScoring.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestIntervalScoring.java new file mode 100644 index 0000000..73fac61 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestIntervalScoring.java @@ -0,0 +1,85 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.junit.Assert; + +import java.io.IOException; + +import static org.hamcrest.core.Is.is; + +public class TestIntervalScoring extends IntervalTestBase { + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + for (String content : docFields) { + Document doc = new Document(); + doc.add(newField("field", content, TextField.TYPE_NOT_STORED)); + writer.addDocument(doc); + } + } + + private String[] docFields = { + "Should we, could we, would we?", + "It should - would it?", + "It shouldn't", + "Should we, should we, should we" + }; + + public void testOrderedNearQueryScoring() throws IOException { + OrderedNearQuery q = new OrderedNearQuery(10, makeTermQuery("should"), + makeTermQuery("would")); + checkScores(q, searcher, 1, 0); + } + + public void testEmptyMultiTermQueryScoring() throws IOException { + OrderedNearQuery q = new OrderedNearQuery(10, new RegexpQuery(new Term("field", "bar.*")), + new RegexpQuery(new Term("field", "foo.*"))); + TopDocs docs = searcher.search(q, 10); + Assert.assertEquals(docs.totalHits, 0); + } + + public void testRewrittenEmptyMultiTermPreservesField() throws IOException { + OrderedNearQuery q = new OrderedNearQuery(10, new RegexpQuery(new Term("field", "bar.*")), + new RegexpQuery(new Term("field", "foo.*"))); + Query rewritten = q.rewrite(searcher.getIndexReader()); + assertThat(rewritten.getField(), is("field")); + } + + public void testRewrittenEmptyBooleans() throws IOException { + OrderedNearQuery oq = new OrderedNearQuery(10, new RegexpQuery(new Term("field", "bar.*")), + new RegexpQuery(new Term("field", "foo.*"))); + TermQuery tq = new TermQuery(new Term("field", "should")); + BooleanQuery bq = new BooleanQuery(); + bq.add(oq, BooleanClause.Occur.SHOULD); + bq.add(tq, BooleanClause.Occur.SHOULD); + + checkScores(bq.rewrite(searcher.getIndexReader()), searcher, 3, 1, 0); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestNestedIntervalFilterQueries.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestNestedIntervalFilterQueries.java new file mode 100644 index 0000000..b11633d --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestNestedIntervalFilterQueries.java @@ -0,0 +1,122 @@ +package org.apache.lucene.search.intervals; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; + +import java.io.IOException; + +public class TestNestedIntervalFilterQueries extends IntervalTestBase { + + @Override + protected void addDocs(RandomIndexWriter writer) throws IOException { + for (int i = 0; i < docFields.length; i++) { + Document doc = new Document(); + doc.add(newField("field", docFields[i], TextField.TYPE_STORED)); + writer.addDocument(doc); + } + } + + private String[] docFields = { + "w1 w2 w3 w4 w5 w6 w7 w8 w9 w10 w11 w12", //0 + "w1 w3 w4 w5 w6 w7 w8", //1 + "w1 w3 w10 w4 w5 w6 w7 w8", //2 + "w1 w3 w2 w4 w5 w6 w7 w8", //3 + }; + + public void testOrderedDisjunctionQueries() throws IOException { + // Two phrases whose subparts appear in a document, but that do not fulfil the slop + // requirements of the parent IntervalFilterQuery + Query sentence1 = new OrderedNearQuery(0, makeTermQuery("w1"), makeTermQuery("w8"), makeTermQuery("w4")); + Query sentence2 = new OrderedNearQuery(0, makeTermQuery("w3"), makeTermQuery("w7"), makeTermQuery("w6")); + BooleanQuery bq = new BooleanQuery(); + bq.add(sentence1, BooleanClause.Occur.SHOULD); + bq.add(sentence2, BooleanClause.Occur.SHOULD); + checkIntervals(bq, searcher, new int[][]{}); + } + + public void testFilterDisjunctionQuery() throws IOException { + Query near1 = makeTermQuery("w4"); + Query near2 = new OrderedNearQuery(3, false, makeTermQuery("w1"), makeTermQuery("w10")); + BooleanQuery bq = new BooleanQuery(); + bq.add(near1, BooleanClause.Occur.SHOULD); + bq.add(near2, BooleanClause.Occur.SHOULD); + checkIntervals(bq, searcher, new int[][]{ + { 0, 3, 3 }, + { 1, 2, 2 }, + { 2, 0, 2, 3, 3 }, + { 3, 3, 3 } + }); + } + + // or(w1 pre/2 w2, w1 pre/3 w10) + public void testOrNearNearQuery() throws IOException { + Query near1 = new OrderedNearQuery(2, false, makeTermQuery("w1"), makeTermQuery("w2")); + Query near2 = new OrderedNearQuery(3, false, makeTermQuery("w1"), makeTermQuery("w10")); + BooleanQuery bq = new BooleanQuery(); + bq.add(near1, BooleanClause.Occur.SHOULD); + bq.add(near2, BooleanClause.Occur.SHOULD); + checkIntervals(bq, searcher, new int[][]{ + { 0, 0, 1 }, + { 2, 0, 2 }, + { 3, 0, 2 } + }); + } + + // or(w2 within/2 w1, w10 within/3 w1) + public void testUnorderedNearNearQuery() throws IOException { + Query near1 = new UnorderedNearQuery(2, false, makeTermQuery("w2"), makeTermQuery("w1")); + Query near2 = new UnorderedNearQuery(3, false, makeTermQuery("w10"), makeTermQuery("w1")); + BooleanQuery bq = new BooleanQuery(); + bq.add(near1, BooleanClause.Occur.SHOULD); + bq.add(near2, BooleanClause.Occur.SHOULD); + checkIntervals(bq, searcher, new int[][]{ + {0, 0, 1}, + {2, 0, 2}, + {3, 0, 2} + }); + } + + // (a pre/2 b) pre/6 (c pre/2 d) + public void testNearNearNearQuery() throws IOException { + Query near1 = new OrderedNearQuery(2, false, makeTermQuery("w1"), makeTermQuery("w4")); + Query near2 = new OrderedNearQuery(2, false, makeTermQuery("w10"), makeTermQuery("w12")); + Query near3 = new OrderedNearQuery(6, near1, near2); + checkIntervals(near3, searcher, new int[][]{ + { 0, 0, 11, 0, 3, 9, 11 } + }); + } + + public void testOrNearNearNonExistentQuery() throws IOException { + Query near1 = new OrderedNearQuery(2, false, makeTermQuery("w1"), makeTermQuery("w12")); + Query near2 = new OrderedNearQuery(2, false, makeTermQuery("w3"), makeTermQuery("w8")); + BooleanQuery bq = new BooleanQuery(); + bq.add(near1, BooleanClause.Occur.SHOULD); + bq.add(near2, BooleanClause.Occur.SHOULD); + BooleanQuery wrapper = new BooleanQuery(); + wrapper.add(bq, BooleanClause.Occur.MUST); + wrapper.add(makeTermQuery("foo"), BooleanClause.Occur.MUST_NOT); + checkIntervals(wrapper, searcher, new int[][]{}); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/intervals/TestPositionsAndOffsets.java b/lucene/core/src/test/org/apache/lucene/search/intervals/TestPositionsAndOffsets.java new file mode 100644 index 0000000..1a66e28 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/intervals/TestPositionsAndOffsets.java @@ -0,0 +1,68 @@ +package org.apache.lucene.search.intervals; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; + +import java.io.IOException; + +// We need to store offsets here, so don't use the following Codecs, which don't +// support them. +@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"}) +public class TestPositionsAndOffsets extends IntervalTestBase { + + protected void addDocs(RandomIndexWriter writer) throws IOException { + FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED); + fieldType.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + Document doc = new Document(); + doc.add(newField( + "field", + "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! Some like it hot, some" + + " like it cold, Some like it in the pot nine days old! Pease porridge hot! Pease porridge cold!", + fieldType)); + writer.addDocument(doc); + } + + public void testTermQueryOffsets() throws IOException { + Query query = new TermQuery(new Term("field", "porridge")); + checkIntervalOffsets(query, searcher, new int[][]{ + { 0, 6, 14, 26, 34, 47, 55, 164, 172, 184, 192 } + }); + } + + public void testBooleanQueryOffsets() throws IOException { + BooleanQuery query = new BooleanQuery(); + query.add(new BooleanClause(new TermQuery(new Term("field", "porridge")), + BooleanClause.Occur.MUST)); + query.add(new BooleanClause(new TermQuery(new Term("field", "nine")), + BooleanClause.Occur.MUST)); + checkIntervalOffsets(query, searcher, new int[][]{ + { 0, 6, 14, 26, 34, 47, 55, 67, 71, 143, 147, 164, 172, 184, 192 } + }); + } + +} \ No newline at end of file diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index e9c1413..097eff3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -30,6 +30,7 @@ import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Weight; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -167,7 +168,7 @@ public class TestNearSpansOrdered extends LuceneTestCase { Weight w = searcher.createNormalizedWeight(q); IndexReaderContext topReaderContext = searcher.getTopReaderContext(); AtomicReaderContext leave = topReaderContext.leaves().get(0); - Scorer s = w.scorer(leave, leave.reader().getLiveDocs()); + Scorer s = w.scorer(leave, PostingFeatures.POSITIONS, leave.reader().getLiveDocs()); assertEquals(1, s.advance(1)); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java index a45a81f..215eb1e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -17,9 +17,6 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.List; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -38,11 +35,15 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.List; + public class TestSpans extends LuceneTestCase { private IndexSearcher searcher; private IndexReader reader; @@ -429,7 +430,7 @@ public class TestSpans extends LuceneTestCase { slop, ordered); - spanScorer = searcher.createNormalizedWeight(snq).scorer(ctx, ctx.reader().getLiveDocs()); + spanScorer = searcher.createNormalizedWeight(snq).scorer(ctx, PostingFeatures.POSITIONS, ctx.reader().getLiveDocs()); } finally { searcher.setSimilarity(oldSim); } diff --git a/lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionRescorer.java b/lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionRescorer.java index 4314c27..5d49972 100644 --- a/lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionRescorer.java +++ b/lucene/expressions/src/java/org/apache/lucene/expressions/ExpressionRescorer.java @@ -17,12 +17,6 @@ package org.apache.lucene.expressions; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.queries.function.ValueSource; @@ -33,6 +27,13 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortRescorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * A {@link Rescorer} that uses an expression to re-score @@ -86,7 +87,12 @@ class ExpressionRescorer extends SortRescorer { public int nextDoc() { throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()"); } - + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + @Override public float score() { return score; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java index 972df94..5f1bceb 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java @@ -16,11 +16,9 @@ package org.apache.lucene.facet; * See the License for the specific language governing permissions and * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; @@ -29,10 +27,12 @@ import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.Arrays; + /** Only purpose is to punch through and return a * DrillSidewaysScorer */ @@ -118,17 +118,17 @@ class DrillSidewaysQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { // We can only run as a top scorer: throw new UnsupportedOperationException(); } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, PostingFeatures flags, Bits acceptDocs) throws IOException { // TODO: it could be better if we take acceptDocs // into account instead of baseScorer? - Scorer baseScorer = baseWeight.scorer(context, acceptDocs); + Scorer baseScorer = baseWeight.scorer(context, flags, acceptDocs); DrillSidewaysScorer.DocsAndCost[] dims = new DrillSidewaysScorer.DocsAndCost[drillDowns.length]; int nullCount = 0; @@ -173,7 +173,7 @@ class DrillSidewaysQuery extends Query { dims[dim].disi = disi; } } else { - DocIdSetIterator disi = ((Weight) drillDowns[dim]).scorer(context, null); + DocIdSetIterator disi = ((Weight) drillDowns[dim]).scorer(context, flags, null); if (disi == null) { nullCount++; continue; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java index 273b6b1..40b5ff3 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java @@ -17,21 +17,22 @@ package org.apache.lucene.facet; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; + class DrillSidewaysScorer extends BulkScorer { //private static boolean DEBUG = false; @@ -628,7 +629,7 @@ class DrillSidewaysScorer extends BulkScorer { public FakeScorer() { super(null); } - + @Override public int advance(int target) { throw new UnsupportedOperationException("FakeScorer doesn't support advance(int)"); @@ -648,7 +649,12 @@ class DrillSidewaysScorer extends BulkScorer { public int nextDoc() { throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()"); } - + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException("FakeScorer doesn't support intervals()"); + } + @Override public float score() { return collectScore; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java index 6514a3c..af874f8 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java @@ -17,14 +17,8 @@ package org.apache.lucene.facet.taxonomy; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.lucene.facet.FacetsCollector.MatchingDocs; import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsCollector.MatchingDocs; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.queries.function.FunctionValues; @@ -33,8 +27,15 @@ import org.apache.lucene.queries.function.docvalues.DoubleDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.IntsRef; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + /** Aggregates sum of values from {@link * FunctionValues#doubleVal}, for each facet label. * @@ -66,6 +67,12 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets { float score; int docID; FakeScorer() { super(null); } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + @Override public float score() throws IOException { return score; } @Override public int freq() throws IOException { throw new UnsupportedOperationException(); } @Override public int docID() { return docID; } diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java index f40c2a7..7523a85 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java @@ -18,15 +18,27 @@ package org.apache.lucene.search.grouping; */ -import java.io.IOException; -import java.util.Collection; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.search.*; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.PriorityQueue; +import java.io.IOException; +import java.util.Collection; + // TODO: this sentence is too long for the class summary. /** BlockGroupingCollector performs grouping with a * single pass collector, as long as you are grouping by a @@ -120,6 +132,10 @@ public class BlockGroupingCollector extends SimpleCollector { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + public long cost() { return 1; } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/ArrayIntervalIterator.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/ArrayIntervalIterator.java new file mode 100644 index 0000000..61b435e --- /dev/null +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/ArrayIntervalIterator.java @@ -0,0 +1,69 @@ +package org.apache.lucene.search.highlight.positions; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.search.intervals.Interval; +import org.apache.lucene.search.intervals.IntervalCollector; +import org.apache.lucene.search.intervals.IntervalIterator; + +/** + * Present an array of PositionIntervals as an Iterator. + * @lucene.experimental + */ +public class ArrayIntervalIterator extends IntervalIterator { + + private int next = 0; + private int count; + private Interval[] positions; + + public ArrayIntervalIterator (Interval[] positions, int count) { + super(null, false); + this.positions = positions; + this.count = count; + } + + @Override + public Interval next() { + if (next >= count) + return null; + return positions[next++]; + } + + @Override + public IntervalIterator[] subs(boolean inOrder) { + return EMPTY; + } + + @Override + public void collect(IntervalCollector collector) { + assert collectIntervals; + } + + @Override + public int scorerAdvanced(int docId) throws IOException { + return 0; + } + + @Override + public int matchDistance() { + return 0; + } + +} \ No newline at end of file diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/DocAndPositions.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/DocAndPositions.java new file mode 100644 index 0000000..ff848ea --- /dev/null +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/DocAndPositions.java @@ -0,0 +1,66 @@ +package org.apache.lucene.search.highlight.positions; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.intervals.Interval; +import org.apache.lucene.util.ArrayUtil; + +import java.util.Comparator; + +/** Used to accumulate position intervals while scoring + * @lucene.experimental + */ +public final class DocAndPositions extends ScoreDoc { + + public int posCount = 0; + public Interval[] positions; + + public DocAndPositions(int doc) { + super(doc, 0); + positions = new Interval[32]; + } + + public void storePosition (Interval pos) { + ensureStorage(); + positions[posCount++] = (Interval) pos.clone(); + } + + private void ensureStorage () { + if (posCount >= positions.length) { + Interval temp[] = new Interval[positions.length * 2]; + System.arraycopy(positions, 0, temp, 0, positions.length); + positions = temp; + } + } + + public Interval[] sortedPositions() { + ArrayUtil.timSort(positions, 0, posCount, new Comparator() { + public int compare(Interval o1, Interval o2) { + return + o1.begin < o2.begin ? -1 : + (o1.begin > o2.begin ? 1 : + (o1.end < o2.end ? -1 : + (o1.end > o2.end ? 1 : + 0))); + } + + }); + return positions; + } +} diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/HighlightingIntervalCollector.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/HighlightingIntervalCollector.java new file mode 100644 index 0000000..9a50f5f --- /dev/null +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/HighlightingIntervalCollector.java @@ -0,0 +1,109 @@ +package org.apache.lucene.search.highlight.positions; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.search.Weight.PostingFeatures; +import org.apache.lucene.search.intervals.Interval; +import org.apache.lucene.search.intervals.IntervalCollector; +import org.apache.lucene.search.intervals.IntervalIterator; + +import java.io.IOException; + +/** + * Collects the first maxDocs docs and their positions matching the query + * + * @lucene.experimental + */ + +public class HighlightingIntervalCollector extends SimpleCollector implements IntervalCollector { + + int count; + DocAndPositions docs[]; + + public HighlightingIntervalCollector (int maxDocs) { + docs = new DocAndPositions[maxDocs]; + } + + protected Scorer scorer; + private IntervalIterator positions; + + @Override + public void collect(int doc) throws IOException { + if (count >= docs.length) + return; + addDoc (doc); + // consume any remaining positions the scorer didn't report + docs[count-1].score=scorer.score(); + positions.scorerAdvanced(doc); + while(positions.next() != null) { + positions.collect(this); + } + } + + private boolean addDoc (int doc) { + if (count <= 0 || docs[count-1].doc != doc) { + DocAndPositions spdoc = new DocAndPositions (doc); + docs[count++] = spdoc; + return true; + } + return false; + } + + public boolean acceptsDocsOutOfOrder() { + return false; + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + positions = scorer.intervals(true); + // If we want to visit the other scorers, we can, here... + } + + public Scorer getScorer () { + return scorer; + } + + public DocAndPositions[] getDocs () { + DocAndPositions ret[] = new DocAndPositions[count]; + System.arraycopy(docs, 0, ret, 0, count); + return ret; + } + + public void setNextReader(AtomicReaderContext context) throws IOException { + } + + @Override + public PostingFeatures postingFeatures() { + return PostingFeatures.OFFSETS; + } + + @Override + public void collectLeafPosition(Scorer scorer, Interval interval, + int docID) { + addDoc(docID); + docs[count - 1].storePosition(interval); + } + + @Override + public void collectComposite(Scorer scorer, Interval interval, + int docID) { + } + +} diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/IntervalTokenStream.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/IntervalTokenStream.java new file mode 100644 index 0000000..c5ee2ff --- /dev/null +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/positions/IntervalTokenStream.java @@ -0,0 +1,74 @@ +package org.apache.lucene.search.highlight.positions; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.search.intervals.Interval; +import org.apache.lucene.search.intervals.IntervalIterator; + +/** + * A TokenStream constructed from a stream of positions and their offsets. + * The document is segmented into tokens at the start and end offset of each interval. The intervals + * are assumed to be non-overlapping. + * + * TODO: abstract the dependency on the current PositionOffsetMapper impl; + * allow for implementations of position->offset maps that don't rely on term vectors. + * + * @lucene.experimental + */ +public class IntervalTokenStream extends TokenStream { + + //this tokenizer generates four attributes: + // term, offset, positionIncrement? and type? + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + //private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final String text; + private final IntervalIterator positions; + + // the index of the current position interval + private Interval pos = null; + + public IntervalTokenStream (String text, IntervalIterator positions) { + this.text = text; + this.positions = positions; + } + + @Override + public final boolean incrementToken() throws IOException { + pos = positions.next(); + if (pos == null){ + return false; + } + int b, e; + b = pos.offsetBegin; + e = pos.offsetEnd; + assert b >=0; + termAtt.append(text, b, e); + offsetAtt.setOffset(b, e); + posIncrAtt.setPositionIncrement(1); + return true; + } + +} diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/positions/IntervalHighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/positions/IntervalHighlighterTest.java new file mode 100644 index 0000000..b0bf997 --- /dev/null +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/positions/IntervalHighlighterTest.java @@ -0,0 +1,529 @@ +package org.apache.lucene.search.highlight.positions; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.search.highlight.SimpleFragmenter; +import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.search.intervals.BlockIntervalIterator; +import org.apache.lucene.search.intervals.IntervalFilter; +import org.apache.lucene.search.intervals.IntervalFilterQuery; +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.search.intervals.NonOverlappingQuery; +import org.apache.lucene.search.intervals.OrderedNearQuery; +import org.apache.lucene.search.intervals.UnorderedNearQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.junit.Ignore; + +import java.io.IOException; +import java.io.StringReader; + +import static org.hamcrest.core.Is.is; + +/** + * TODO: FIX THIS TEST Phrase and Span Queries positions callback API + */ +@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"}) +public class IntervalHighlighterTest extends LuceneTestCase { + + protected final static String F = "f"; + protected Analyzer analyzer; + protected Directory dir; + protected IndexSearcher searcher; + private IndexWriterConfig iwc; + + private static final String PORRIDGE_VERSE = "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! Some like it hot, some" + + " like it cold, Some like it in the pot nine days old! Pease porridge hot! Pease porridge cold!"; + + public void setUp() throws Exception { + super.setUp(); + iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.CREATE); + analyzer = iwc.getAnalyzer(); + dir = newDirectory(); + } + + public void close() throws IOException { + if (searcher != null) { + searcher.getIndexReader().close(); + searcher = null; + } + dir.close(); + } + + // make several docs + protected void insertDocs(Analyzer analyzer, String... values) + throws Exception { + IndexWriter writer = new IndexWriter(dir, iwc.clone()); + FieldType type = new FieldType(); + type.setIndexed(true); + type.setTokenized(true); + type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + type.setStored(true); + for (String value : values) { + Document doc = new Document(); + Field f = newField(F, value, type); + doc.add(f); + writer.addDocument(doc); + } + writer.shutdown(); + searcher = new IndexSearcher(DirectoryReader.open(dir)); + } + + protected static TermQuery termQuery(String term) { + return new TermQuery(new Term(F, term)); + } + + private String[] doSearch(Query q) throws IOException, + InvalidTokenOffsetsException { + return doSearch(q, 100); + } + + private class ConstantScorer implements + org.apache.lucene.search.highlight.Scorer { + + @Override + public TokenStream init(TokenStream tokenStream) throws IOException { + return tokenStream; + } + + @Override + public void startFragment(TextFragment newFragment) {} + + @Override + public float getTokenScore() { + return 1; + } + + @Override + public float getFragmentScore() { + return 1; + } + } + + private String getHighlight(Query q) throws IOException, InvalidTokenOffsetsException { + return doSearch(q, Integer.MAX_VALUE)[0]; + } + + private String[] doSearch(Query q, int maxFragSize) throws IOException, + InvalidTokenOffsetsException { + return doSearch(q, maxFragSize, 0); + } + private String[] doSearch(Query q, int maxFragSize, int docIndex) throws IOException, InvalidTokenOffsetsException { + return doSearch(q, maxFragSize, docIndex, false); + } + private String[] doSearch(Query q, int maxFragSize, int docIndex, boolean analyze) + throws IOException, InvalidTokenOffsetsException { + // ConstantScorer is a fragment Scorer, not a search result (document) + // Scorer + Highlighter highlighter = new Highlighter(new ConstantScorer()); + highlighter.setTextFragmenter(new SimpleFragmenter(maxFragSize)); + HighlightingIntervalCollector collector = new HighlightingIntervalCollector(10); + if (q instanceof MultiTermQuery) { + ((MultiTermQuery) q) + .setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); + } + searcher.search(q, collector); + DocAndPositions doc = collector.docs[docIndex]; + if (doc == null) return null; + String text = searcher.getIndexReader().document(doc.doc).get(F); + // FIXME: test error cases: for non-stored fields, and fields w/no term + // vectors + // searcher.getIndexReader().getTermFreqVector(doc.doc, F, pom); + final TokenStream stream; + if (analyze) { + stream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, + MockTokenFilter.EMPTY_STOPSET).tokenStream(F, + new StringReader(text)); + } else { + stream = new IntervalTokenStream(text, new ArrayIntervalIterator( + doc.sortedPositions(), doc.posCount)); + } + // + TextFragment[] fragTexts = highlighter.getBestTextFragments( + stream , text, false, 10); + String[] frags = new String[fragTexts.length]; + for (int i = 0; i < frags.length; i++) + frags[i] = fragTexts[i].toString(); + return frags; + } + + public void testTerm() throws Exception { + insertDocs(analyzer, "This is a test test"); + String frags[] = doSearch(termQuery("test")); + assertEquals("This is a test test", frags[0]); + close(); + } + + public void testSeveralSnippets() throws Exception { + String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " + + "Let us see what happens to long in this case."; + String gold = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " + + "Let us see what happens to long in this case."; + insertDocs(analyzer, input); + String frags[] = doSearch(termQuery("long"), input.length()); + assertEquals(gold, frags[0]); + close(); + } + + public void testBooleanAnd() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(termQuery("This"), Occur.MUST)); + bq.add(new BooleanClause(termQuery("test"), Occur.MUST)); + String frags[] = doSearch(bq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testConstantScore() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(termQuery("This"), Occur.MUST)); + bq.add(new BooleanClause(termQuery("test"), Occur.MUST)); + String frags[] = doSearch(new ConstantScoreQuery(bq)); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testBooleanAndOtherOrder() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.MUST)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "This")), Occur.MUST)); + String frags[] = doSearch(bq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testBooleanOr() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.SHOULD)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "This")), Occur.SHOULD)); + String frags[] = doSearch(bq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testSingleMatchScorer() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.SHOULD)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "notoccurringterm")), + Occur.SHOULD)); + String frags[] = doSearch(bq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testBooleanNrShouldMatch() throws Exception { + insertDocs(analyzer, "a b c d e f g h i"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "a")), Occur.SHOULD)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "b")), Occur.SHOULD)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "no")), Occur.SHOULD)); + + // This generates a ConjunctionSumScorer + bq.setMinimumNumberShouldMatch(2); + String frags[] = doSearch(bq); + assertEquals("a b c d e f g h i", frags[0]); + + // This generates no scorer + bq.setMinimumNumberShouldMatch(3); + frags = doSearch(bq); + assertNull(frags); + + // This generates a DisjunctionSumScorer + bq.setMinimumNumberShouldMatch(2); + bq.add(new BooleanClause(new TermQuery(new Term(F, "c")), Occur.SHOULD)); + frags = doSearch(bq); + assertEquals("a b c d e f g h i", frags[0]); + close(); + } + + public void testPhrase() throws Exception { + insertDocs(analyzer, "is it that this is a test, is it"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term(F, "is")), Occur.MUST); + bq.add(new TermQuery(new Term(F, "a")), Occur.MUST); + IntervalFilterQuery pfq = new IntervalFilterQuery(bq, + new BlockPositionIteratorFilter()); + String frags[] = doSearch(pfq); + // make sure we highlight the phrase, and not the terms outside the phrase + assertEquals("is it that this is a test, is it", frags[0]); + close(); + } + + /* + * Failing ... PhraseQuery scorer needs positions()? + */ + //@Ignore + public void testPhraseOriginal() throws Exception { + insertDocs(analyzer, "This is a test"); + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term(F, "a")); + pq.add(new Term(F, "test")); + String frags[] = doSearch(pq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testNestedBoolean() throws Exception { + insertDocs(analyzer, "This is a test"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.SHOULD)); + BooleanQuery bq2 = new BooleanQuery(); + bq2.add(new BooleanClause(new TermQuery(new Term(F, "This")), Occur.SHOULD)); + bq2.add(new BooleanClause(new TermQuery(new Term(F, "is")), Occur.SHOULD)); + bq.add(new BooleanClause(bq2, Occur.SHOULD)); + String frags[] = doSearch(bq); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testWildcard() throws Exception { + insertDocs(analyzer, "This is a test"); + String frags[] = doSearch(new WildcardQuery(new Term(F, "t*t"))); + assertEquals("This is a test", frags[0]); + close(); + } + + public void testMixedBooleanNot() throws Exception { + insertDocs(analyzer, "this is a test", "that is an elephant"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.MUST)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "that")), Occur.MUST_NOT)); + String frags[] = doSearch(bq); + assertEquals("this is a test", frags[0]); + close(); + } + + public void testMixedBooleanShould() throws Exception { + insertDocs(analyzer, "this is a test", "that is an elephant", "the other was a rhinoceros"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "is")), Occur.MUST)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "test")), Occur.SHOULD)); + String frags[] = doSearch(bq, 50, 0); + assertEquals("this is a test", frags[0]); + frags = doSearch(bq, 50, 1); + assertEquals("that is an elephant", frags[0]); + + bq.add(new BooleanClause(new TermQuery(new Term(F, "rhinoceros")), Occur.SHOULD)); + frags = doSearch(bq, 50, 0); + assertEquals("this is a test", frags[0]); + frags = doSearch(bq, 50, 1); + assertEquals("that is an elephant", frags[0]); + close(); + } + + public void testMultipleDocumentsAnd() throws Exception { + insertDocs(analyzer, "This document has no matches", PORRIDGE_VERSE, + "This document has some Pease porridge in it"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "Pease")), Occur.MUST)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "porridge")), Occur.MUST)); + String frags[] = doSearch(bq, 50, 0); + assertEquals( + "Pease porridge hot! Pease porridge cold! Pease", + frags[0]); + frags = doSearch(bq, 50, 1); + assertEquals("This document has some Pease porridge in it", + frags[0]); + close(); + } + + + public void testMultipleDocumentsOr() throws Exception { + insertDocs(analyzer, "This document has no matches", PORRIDGE_VERSE, + "This document has some Pease porridge in it"); + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(new Term(F, "Pease")), Occur.SHOULD)); + bq.add(new BooleanClause(new TermQuery(new Term(F, "porridge")), + Occur.SHOULD)); + String frags[] = doSearch(bq, 50, 0); + assertEquals( + "Pease porridge hot! Pease porridge cold! Pease", + frags[0]); + frags = doSearch(bq, 50, 1); + assertEquals("This document has some Pease porridge in it", + frags[0]); + close(); + } + + public void testBrouwerianQuery() throws Exception { + + insertDocs(analyzer, "the quick brown duck jumps over the lazy dog with the quick brown fox"); + + BooleanQuery query = new BooleanQuery(); + query.add(new TermQuery(new Term(F, "the")), Occur.MUST); + query.add(new TermQuery(new Term(F, "quick")), Occur.MUST); + query.add(new TermQuery(new Term(F, "jumps")), Occur.MUST); + + assertThat(getHighlight(query), + is("the quick brown duck jumps over the lazy dog with the quick brown fox")); + + UnorderedNearQuery unq = new UnorderedNearQuery(20, true, query); + NonOverlappingQuery bq = new NonOverlappingQuery(unq, new TermQuery(new Term(F, "duck"))); + + assertThat(getHighlight(bq), + is("the quick brown duck jumps over the lazy dog with the quick brown fox")); + + close(); + } + + @Ignore("not implemented yet - unsupported") + public void testMultiPhraseQuery() throws Exception { + MultiPhraseQuery query = new MultiPhraseQuery(); + insertDocs(analyzer, "pease porridge hot but not too hot or otherwise pease porridge cold"); + + query.add(terms(F, "pease"), 0); + query.add(terms(F, "porridge"), 1); + query.add(terms(F, "hot", "cold"), 2); + query.setSlop(1); + + String[] frags = doSearch(query, Integer.MAX_VALUE); + assertEquals("pease porridge hot but not too hot or otherwise pease porridge cold", frags[0]); + + close(); + } + + @Ignore("not implemented yet - unsupported") + public void testMultiPhraseQueryCollisions() throws Exception { + MultiPhraseQuery query = new MultiPhraseQuery(); + insertDocs(analyzer, "pease porridge hot not too hot or otherwise pease porridge porridge"); + + query.add(terms(F, "pease"), 0); + query.add(terms(F, "porridge"), 1); + query.add(terms(F, "coldasice", "porridge" ), 2); + query.setSlop(1); + + String[] frags = doSearch(query, Integer.MAX_VALUE); + assertEquals("pease porridge hot but not too hot or otherwise pease porridge porridge", frags[0]); + + close(); + } + + public void testNearPhraseQuery() throws Exception { + + insertDocs(analyzer, "pease porridge rather hot and pease porridge fairly cold"); + + Query firstQ = new OrderedNearQuery(4, termQuery("pease"), termQuery("porridge"), termQuery("hot")); + { + String frags[] = doSearch(firstQ, Integer.MAX_VALUE); + assertEquals("pease porridge rather hot and pease porridge fairly cold", frags[0]); + } + + // near.3(near.4(pease, porridge, hot), near.4(pease, porridge, cold)) + Query q = new OrderedNearQuery(3, + firstQ, + new OrderedNearQuery(4, termQuery("pease"), termQuery("porridge"), termQuery("cold"))); + + String frags[] = doSearch(q, Integer.MAX_VALUE); + assertEquals("pease porridge rather hot and pease porridge fairly cold", + frags[0]); + + close(); + } + + private Term[] terms(String field, String...tokens) { + Term[] terms = new Term[tokens.length]; + for (int i = 0; i < tokens.length; i++) { + terms[i] = new Term(field, tokens[i]); + } + return terms; + } + + @Ignore + public void testSloppyPhraseQuery() throws Exception { + assertSloppyPhrase( "a b c d a b c d e f", "a b c d a b c d e f", 2, "c", "a"); + assertSloppyPhrase( "a c e b d e f a b","a c e b d e f a b", 2, "a", "b"); + assertSloppyPhrase( "Y A X B A", "Y A X B A", 2, "X", "A", "A"); + + //assertSloppyPhrase( "X A X B A","X A X B A", 2, "X", "A", "A"); // non overlapping minmal!! + assertSloppyPhrase( "A A A X",null, 2, "X", "A", "A"); + assertSloppyPhrase( "A A X A", "A A X A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A X A Y B A", "A A X A Y B A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A X", null, 2, "X", "A", "A"); + assertSloppyPhrase( "A X A", null, 1, "X", "A", "A"); + + assertSloppyPhrase( "A X B A", "A X B A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A X A X B A X B B A A X B A A", "A A X A X B A X B B A A X B A A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A X A X B A X B B A A X B A A", "A A X A X B A X B B A A X B A A", 2, "X", "A", "A"); + + assertSloppyPhrase( "A A X A X B A", "A A X A X B A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A Y A X B A", "A A Y A X B A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A Y A X B A A", "A A Y A X B A A", 2, "X", "A", "A"); + assertSloppyPhrase( "A A X A Y B A", null , 1, "X", "A", "A"); + close(); + } + + + private void assertSloppyPhrase(String doc, String expected, int slop, String...query) throws Exception { + insertDocs(analyzer, doc); + PhraseQuery pq = new PhraseQuery(); + for (String string : query) { + pq.add(new Term(F, string)); + } + + pq.setSlop(slop); +// System.out.println(doc); + String[] frags = doSearch(pq, 50); + if (expected == null) { + assertNull(frags != null ? frags[0] : "", frags); + } else { + assertEquals(expected, frags[0]); + } + } + + public static class BlockPositionIteratorFilter implements IntervalFilter { + + @Override + public IntervalIterator filter(boolean collectIntervals, IntervalIterator iter) { + return new BlockIntervalIterator(collectIntervals, iter); + } + + } + +} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/FakeScorer.java b/lucene/join/src/java/org/apache/lucene/search/join/FakeScorer.java index cbd1ff8..6f711ed 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/FakeScorer.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/FakeScorer.java @@ -17,11 +17,13 @@ package org.apache.lucene.search.join; * limitations under the License. */ -import java.util.Collection; - import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; + +import java.io.IOException; +import java.util.Collection; /** Passed to {@link LeafCollector#setScorer} during join collection. */ final class FakeScorer extends Scorer { @@ -52,7 +54,12 @@ final class FakeScorer extends Scorer { public int nextDoc() { throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()"); } - + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException("FakeScorer doesn't support intervals()"); + } + @Override public float score() { return score; diff --git a/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java index 220d0e1..ecb18f7 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java @@ -17,31 +17,31 @@ package org.apache.lucene.search.join; * limitations under the License. */ -import java.io.IOException; -import java.util.Locale; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Collector; +import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.ComplexExplanation; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.FixedBitSet; +import java.io.IOException; +import java.util.Locale; +import java.util.Set; + class TermsIncludingScoreQuery extends Query { final String field; @@ -133,7 +133,7 @@ class TermsIncludingScoreQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - SVInnerScorer scorer = (SVInnerScorer) bulkScorer(context, false, null); + SVInnerScorer scorer = (SVInnerScorer) bulkScorer(context, true, PostingFeatures.DOCS_AND_FREQS, null); if (scorer != null) { return scorer.explain(doc); } @@ -162,8 +162,7 @@ class TermsIncludingScoreQuery extends Query { originalWeight.normalize(norm, topLevelBoost * TermsIncludingScoreQuery.this.getBoost()); } - @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { Terms terms = context.reader().terms(field); if (terms == null) { return null; @@ -181,10 +180,10 @@ class TermsIncludingScoreQuery extends Query { } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, PostingFeatures flags, Bits acceptDocs) throws IOException { if (scoreDocsInOrder) { - return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + return super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); } else { Terms terms = context.reader().terms(field); if (terms == null) { @@ -288,6 +287,7 @@ class TermsIncludingScoreQuery extends Query { return new ComplexExplanation(true, scores[ords[scoreUpto]], "Score based on join value " + termsEnum.term().utf8ToString()); } + } // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted @@ -376,6 +376,11 @@ class TermsIncludingScoreQuery extends Query { } @Override + public IntervalIterator intervals(boolean collectIntervals) + throws IOException { + return null; + } + public long cost() { return cost; } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java index c555c4a..c15ce83 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java @@ -17,11 +17,6 @@ package org.apache.lucene.search.join; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -32,9 +27,15 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.Set; + /** * Just like {@link ToParentBlockJoinQuery}, except this * query joins in reverse: you provide a Query matching @@ -124,9 +125,9 @@ public class ToChildBlockJoinQuery extends Query { // NOTE: acceptDocs applies (and is checked) only in the // child document space @Override - public Scorer scorer(AtomicReaderContext readerContext, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext readerContext, PostingFeatures flags, Bits acceptDocs) throws IOException { - final Scorer parentScorer = parentWeight.scorer(readerContext, null); + final Scorer parentScorer = parentWeight.scorer(readerContext, flags, null); if (parentScorer == null) { // No matches @@ -323,6 +324,10 @@ public class ToChildBlockJoinQuery extends Query { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return parentScorer.intervals(collectIntervals); + } + public long cost() { return parentScorer.cost(); } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java index 65767fc..a57825e 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.*; import org.apache.lucene.search.Scorer.ChildScorer; import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.TopGroups; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.ArrayUtil; import java.io.IOException; diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java index f6985e2..39d859c 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java @@ -17,12 +17,6 @@ package org.apache.lucene.search.join; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.Locale; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; @@ -37,10 +31,17 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.search.grouping.TopGroups; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.Locale; +import java.util.Set; + /** * This query requires that you index * children and parent docs as a single block, using the @@ -158,9 +159,9 @@ public class ToParentBlockJoinQuery extends Query { // NOTE: acceptDocs applies (and is checked) only in the // parent document space @Override - public Scorer scorer(AtomicReaderContext readerContext, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext readerContext, PostingFeatures flags, Bits acceptDocs) throws IOException { - final Scorer childScorer = childWeight.scorer(readerContext, readerContext.reader().getLiveDocs()); + final Scorer childScorer = childWeight.scorer(readerContext, flags, readerContext.reader().getLiveDocs()); if (childScorer == null) { // No matches return null; @@ -192,7 +193,7 @@ public class ToParentBlockJoinQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, context.reader().getLiveDocs()); + BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); if (scorer != null && scorer.advance(doc) == doc) { return scorer.explain(context.docBase); } @@ -426,6 +427,10 @@ public class ToParentBlockJoinQuery extends Query { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + public long cost() { return childScorer.cost(); } diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java index 48c33fe..cafeaba 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java @@ -29,6 +29,7 @@ import org.apache.lucene.document.*; import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.TopGroups; import org.apache.lucene.store.Directory; @@ -1151,7 +1152,7 @@ public class TestBlockJoin extends LuceneTestCase { ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg); Weight weight = s.createNormalizedWeight(q); - DocIdSetIterator disi = weight.scorer(s.getIndexReader().leaves().get(0), null); + DocIdSetIterator disi = weight.scorer(s.getIndexReader().leaves().get(0), PostingFeatures.DOCS_AND_FREQS, null); assertEquals(1, disi.advance(1)); r.close(); dir.close(); @@ -1185,7 +1186,7 @@ public class TestBlockJoin extends LuceneTestCase { ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg); Weight weight = s.createNormalizedWeight(q); - DocIdSetIterator disi = weight.scorer(s.getIndexReader().leaves().get(0), null); + DocIdSetIterator disi = weight.scorer(s.getIndexReader().leaves().get(0), PostingFeatures.DOCS_AND_FREQS, null); assertEquals(2, disi.advance(0)); r.close(); dir.close(); diff --git a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java index 608b072..015d33b 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java +++ b/lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java @@ -17,18 +17,19 @@ package org.apache.lucene.index.sorter; * limitations under the License. */ -import java.io.IOException; -import java.util.Comparator; - import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.TimSorter; import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; +import java.io.IOException; +import java.util.Comparator; + /** * Sorts documents of a given index by returning a permutation on the document * IDs. @@ -261,7 +262,12 @@ final class Sorter { } static final Scorer FAKESCORER = new Scorer(null) { - + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + @Override public float score() throws IOException { throw new UnsupportedOperationException(); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java index 9861617..4ec5710 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java @@ -17,12 +17,6 @@ package org.apache.lucene.queries; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.Set; -import java.util.Arrays; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -30,13 +24,20 @@ import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.ComplexExplanation; import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Weight; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Set; + /** * Query that sets document score as a programmatic function of several (sub) scores: *

      @@ -234,14 +235,14 @@ public class CustomScoreQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { - Scorer subQueryScorer = subQueryWeight.scorer(context, acceptDocs); + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { + Scorer subQueryScorer = subQueryWeight.scorer(context, flags, acceptDocs); if (subQueryScorer == null) { return null; } Scorer[] valSrcScorers = new Scorer[valSrcWeights.length]; for(int i = 0; i < valSrcScorers.length; i++) { - valSrcScorers[i] = valSrcWeights[i].scorer(context, acceptDocs); + valSrcScorers[i] = valSrcWeights[i].scorer(context, flags, acceptDocs); } return new CustomScorer(CustomScoreQuery.this.getCustomScoreProvider(context), this, queryWeight, subQueryScorer, valSrcScorers); } @@ -349,6 +350,10 @@ public class CustomScoreQuery extends Query { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return subQueryScorer.intervals(collectIntervals); + } + public long cost() { return subQueryScorer.cost(); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java index 7ba5f4f..51c3d6c 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.queries.function; */ import org.apache.lucene.search.*; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -97,8 +98,8 @@ public class BoostedQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { - Scorer subQueryScorer = qWeight.scorer(context, acceptDocs); + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { + Scorer subQueryScorer = qWeight.scorer(context, flags, acceptDocs); if (subQueryScorer == null) { return null; } @@ -188,6 +189,10 @@ public class BoostedQuery extends Query { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return scorer.intervals(collectIntervals); + } + public long cost() { return scorer.cost(); } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java index 726b97e..0c2992d 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java @@ -20,13 +20,18 @@ package org.apache.lucene.queries.function; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.search.*; -import org.apache.lucene.index.MultiFields; +import org.apache.lucene.search.ComplexExplanation; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.Bits; import java.io.IOException; -import java.util.Set; import java.util.Map; +import java.util.Set; /** @@ -90,13 +95,13 @@ public class FunctionQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { return new AllScorer(context, acceptDocs, this, queryWeight); } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - return ((AllScorer)scorer(context, context.reader().getLiveDocs())).explain(doc); + return ((AllScorer)scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs())).explain(doc); } } @@ -178,6 +183,11 @@ public class FunctionQuery extends Query { result.addDetail(new Explanation(weight.queryNorm,"queryNorm")); return result; } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException("AllScorer doesn't support interval iterators."); + } } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java index ec8aced..32af534 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java @@ -20,6 +20,7 @@ package org.apache.lucene.queries.function; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.Bits; import java.io.IOException; @@ -88,6 +89,11 @@ public class ValueSourceScorer extends Scorer { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException("ValueSourceScorer doesn't support interval iterators."); + } + + @Override public int freq() throws IOException { return 1; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/QueryValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/QueryValueSource.java index 10a5f0d..a9d04ec 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/QueryValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/QueryValueSource.java @@ -23,6 +23,7 @@ import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.FloatDocValues; import org.apache.lucene.search.*; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.util.Bits; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueFloat; @@ -123,7 +124,7 @@ class QueryDocValues extends FloatDocValues { try { if (doc < lastDocRequested) { if (noMatches) return defVal; - scorer = weight.scorer(readerContext, acceptDocs); + scorer = weight.scorer(readerContext, PostingFeatures.DOCS_AND_FREQS, acceptDocs); if (scorer==null) { noMatches = true; return defVal; @@ -154,7 +155,7 @@ class QueryDocValues extends FloatDocValues { try { if (doc < lastDocRequested) { if (noMatches) return false; - scorer = weight.scorer(readerContext, acceptDocs); + scorer = weight.scorer(readerContext, PostingFeatures.DOCS_AND_FREQS, acceptDocs); scorerDoc = -1; if (scorer==null) { noMatches = true; @@ -212,7 +213,7 @@ class QueryDocValues extends FloatDocValues { mval.exists = false; return; } - scorer = weight.scorer(readerContext, acceptDocs); + scorer = weight.scorer(readerContext, PostingFeatures.DOCS_AND_FREQS, acceptDocs); scorerDoc = -1; if (scorer==null) { noMatches = true; diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java index 7eae294..389ac41 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java @@ -315,6 +315,9 @@ public class AssertingAtomicReader extends FilterAtomicReader { assert state != DocsEnumState.START : "freq() called before nextDoc()/advance()"; assert state != DocsEnumState.FINISHED : "freq() called after NO_MORE_DOCS"; int freq = super.freq(); + if (freq == 0) { + System.out.println(); + } assert freq > 0; return freq; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java index 47725db..5ace84a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java @@ -17,16 +17,17 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.List; -import java.util.Random; -import java.util.concurrent.ExecutorService; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.TestUtil; +import java.io.IOException; +import java.util.List; +import java.util.Random; +import java.util.concurrent.ExecutorService; + /** * Helper class that adds some extra checks to ensure correct * usage of {@code IndexSearcher} and {@code Weight}. diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java index 705a8a0..51a8738 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java @@ -17,6 +17,9 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.AssertingAtomicReader; +import org.apache.lucene.search.intervals.IntervalIterator; + import java.io.IOException; import java.lang.ref.WeakReference; import java.util.Collection; @@ -25,8 +28,6 @@ import java.util.Map; import java.util.Random; import java.util.WeakHashMap; -import org.apache.lucene.index.AssertingAtomicReader; - /** Wraps a Scorer with additional checks */ public class AssertingScorer extends Scorer { @@ -96,6 +97,11 @@ public class AssertingScorer extends Scorer { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + return in.intervals(collectIntervals); + } + + @Override public Collection getChildren() { // We cannot hide that we hold a single child, else // collectors (e.g. ToParentBlockJoinCollector) that diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java index b075247..03bbb06 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java @@ -17,12 +17,12 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.Random; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.util.Bits; +import java.io.IOException; +import java.util.Random; + class AssertingWeight extends Weight { static Weight wrap(Random random, Weight other) { @@ -60,18 +60,18 @@ class AssertingWeight extends Weight { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { // if the caller asks for in-order scoring or if the weight does not support // out-of order scoring then collection will have to happen in-order. - final Scorer inScorer = in.scorer(context, acceptDocs); + final Scorer inScorer = in.scorer(context, flags, acceptDocs); return AssertingScorer.wrap(new Random(random.nextLong()), inScorer); } @Override - public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, PostingFeatures flags, Bits acceptDocs) throws IOException { // if the caller asks for in-order scoring or if the weight does not support // out-of order scoring then collection will have to happen in-order. - BulkScorer inScorer = in.bulkScorer(context, scoreDocsInOrder, acceptDocs); + BulkScorer inScorer = in.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); if (inScorer == null) { return null; } @@ -83,7 +83,7 @@ class AssertingWeight extends Weight { } else if (random.nextBoolean()) { // Let super wrap this.scorer instead, so we use // AssertingScorer: - inScorer = super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + inScorer = super.bulkScorer(context, scoreDocsInOrder, flags, acceptDocs); } if (scoreDocsInOrder == false && random.nextBoolean()) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index eb1fa1c..515fa2b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -17,29 +17,29 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.List; -import java.util.Random; - import junit.framework.Assert; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.AllDeletedFilterReader; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.List; +import java.util.Random; + import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT; /** @@ -267,7 +267,7 @@ public class QueryUtils { if (scorer == null) { Weight w = s.createNormalizedWeight(q); AtomicReaderContext context = readerContextArray.get(leafPtr); - scorer = w.scorer(context, context.reader().getLiveDocs()); + scorer = w.scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); } int op = order[(opidx[0]++) % order.length]; @@ -314,7 +314,7 @@ public class QueryUtils { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q); AtomicReaderContext ctx = (AtomicReaderContext)indexSearcher.getTopReaderContext(); - Scorer scorer = w.scorer(ctx, ctx.reader().getLiveDocs()); + Scorer scorer = w.scorer(ctx, PostingFeatures.DOCS_AND_FREQS, ctx.reader().getLiveDocs()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); @@ -341,7 +341,7 @@ public class QueryUtils { indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q); AtomicReaderContext ctx = previousReader.getContext(); - Scorer scorer = w.scorer(ctx, ctx.reader().getLiveDocs()); + Scorer scorer = w.scorer(ctx, PostingFeatures.DOCS_AND_FREQS, ctx.reader().getLiveDocs()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); @@ -372,7 +372,7 @@ public class QueryUtils { long startMS = System.currentTimeMillis(); for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = s.createNormalizedWeight(q); - Scorer scorer = w.scorer(context.get(leafPtr), liveDocs); + Scorer scorer = w.scorer(context.get(leafPtr), PostingFeatures.DOCS_AND_FREQS, liveDocs); Assert.assertTrue("query collected "+doc+" but skipTo("+i+") says no more docs!",scorer.advance(i) != DocIdSetIterator.NO_MORE_DOCS); Assert.assertEquals("query collected "+doc+" but skipTo("+i+") got to "+scorer.docID(),doc,scorer.docID()); float skipToScore = scorer.score(); @@ -400,7 +400,7 @@ public class QueryUtils { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q); - Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), previousReader.getLiveDocs()); + Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), PostingFeatures.DOCS_AND_FREQS, previousReader.getLiveDocs()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); @@ -425,7 +425,7 @@ public class QueryUtils { IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); indexSearcher.setSimilarity(s.getSimilarity()); Weight w = indexSearcher.createNormalizedWeight(q); - Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), previousReader.getLiveDocs()); + Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), PostingFeatures.DOCS_AND_FREQS, previousReader.getLiveDocs()); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); diff --git a/maven-build/lucene/analysis/common/pom.xml b/maven-build/lucene/analysis/common/pom.xml new file mode 100644 index 0000000..aaef21b --- /dev/null +++ b/maven-build/lucene/analysis/common/pom.xml @@ -0,0 +1,78 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + jar + Lucene Common Analyzers + Additional Analyzers + + lucene/analysis/common + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/analysis/icu/pom.xml b/maven-build/lucene/analysis/icu/pom.xml new file mode 100644 index 0000000..ba6569c --- /dev/null +++ b/maven-build/lucene/analysis/icu/pom.xml @@ -0,0 +1,88 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-icu + jar + Lucene ICU Analysis Components + + Provides integration with ICU (International Components for Unicode) for + stronger Unicode and internationalization support. + + + lucene/analysis/icu + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + ${project.groupId} + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + com.ibm.icu + icu4j + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/analysis/kuromoji/pom.xml b/maven-build/lucene/analysis/kuromoji/pom.xml new file mode 100644 index 0000000..1132b13 --- /dev/null +++ b/maven-build/lucene/analysis/kuromoji/pom.xml @@ -0,0 +1,84 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + jar + Lucene Kuromoji Japanese Morphological Analyzer + + Lucene Kuromoji Japanese Morphological Analyzer + + + lucene/analysis/kuromoji + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/analysis/morfologik/pom.xml b/maven-build/lucene/analysis/morfologik/pom.xml new file mode 100644 index 0000000..22f4326 --- /dev/null +++ b/maven-build/lucene/analysis/morfologik/pom.xml @@ -0,0 +1,95 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-morfologik + jar + Lucene Morfologik Polish Lemmatizer + + A dictionary-driven lemmatizer for Polish (includes morphosyntactic annotations) + + + lucene/analysis/morfologik + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + org.carrot2 + morfologik-fsa + + + org.carrot2 + morfologik-polish + + + org.carrot2 + morfologik-stemming + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/analysis/phonetic/pom.xml b/maven-build/lucene/analysis/phonetic/pom.xml new file mode 100644 index 0000000..9a39e3f --- /dev/null +++ b/maven-build/lucene/analysis/phonetic/pom.xml @@ -0,0 +1,87 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + jar + Lucene Phonetic Filters + + Provides phonetic encoding via Commons Codec. + + + lucene/analysis/phonetic + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + commons-codec + commons-codec + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/analysis/pom.xml b/maven-build/lucene/analysis/pom.xml new file mode 100644 index 0000000..8d71f65 --- /dev/null +++ b/maven-build/lucene/analysis/pom.xml @@ -0,0 +1,52 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-analysis-modules-aggregator + Lucene Analysis Modules aggregator POM + pom + + common + icu + kuromoji + morfologik + phonetic + smartcn + stempel + uima + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/maven-build/lucene/analysis/smartcn/pom.xml b/maven-build/lucene/analysis/smartcn/pom.xml new file mode 100644 index 0000000..86496c3 --- /dev/null +++ b/maven-build/lucene/analysis/smartcn/pom.xml @@ -0,0 +1,82 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-smartcn + jar + Lucene Smart Chinese Analyzer + Smart Chinese Analyzer + + lucene/analysis/smartcn + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/analysis/stempel/pom.xml b/maven-build/lucene/analysis/stempel/pom.xml new file mode 100644 index 0000000..a156f23 --- /dev/null +++ b/maven-build/lucene/analysis/stempel/pom.xml @@ -0,0 +1,127 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-stempel + jar + Lucene Stempel Analyzer + Stempel Analyzer + + lucene/analysis/stempel + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + check + + + + check-system-out + + + jdk-system-out + + + + org/egothor/stemmer/Compile.class + org/egothor/stemmer/DiffIt.class + + + + check + + + + + + + diff --git a/maven-build/lucene/analysis/uima/pom.xml b/maven-build/lucene/analysis/uima/pom.xml new file mode 100644 index 0000000..d1a905f --- /dev/null +++ b/maven-build/lucene/analysis/uima/pom.xml @@ -0,0 +1,94 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-uima + jar + Lucene UIMA Analysis Components + + Lucene Integration with UIMA for extracting metadata from arbitrary (text) + fields and enrich document with features extracted from UIMA types + (language, sentences, concepts, named entities, etc.) + + + lucene/analysis/uima + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + org.apache.uima + Tagger + + + org.apache.uima + WhitespaceTokenizer + + + org.apache.uima + uimaj-core + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${module-path}/src/test-files + + + + diff --git a/maven-build/lucene/benchmark/pom.xml b/maven-build/lucene/benchmark/pom.xml new file mode 100644 index 0000000..b3699ed --- /dev/null +++ b/maven-build/lucene/benchmark/pom.xml @@ -0,0 +1,157 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-benchmark + jar + Lucene Benchmark + Lucene Benchmarking Module + + lucene/benchmark + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-facet + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + net.sourceforge.nekohtml + nekohtml + + + org.apache.commons + commons-compress + + + xerces + xercesImpl + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + ${module-path} + + conf/**/* + + + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + check + + + + + + + diff --git a/maven-build/lucene/classification/pom.xml b/maven-build/lucene/classification/pom.xml new file mode 100644 index 0000000..50fbf5a --- /dev/null +++ b/maven-build/lucene/classification/pom.xml @@ -0,0 +1,82 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-classification + jar + Lucene Classification + Lucene Classification + + lucene/classification + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + test + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/codecs/pom.xml b/maven-build/lucene/codecs/pom.xml new file mode 100644 index 0000000..fdfc09a --- /dev/null +++ b/maven-build/lucene/codecs/pom.xml @@ -0,0 +1,46 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-codecs-aggregator + pom + Lucene codecs aggregator POM + + src/java + src/test + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/maven-build/lucene/codecs/src/java/pom.xml b/maven-build/lucene/codecs/src/java/pom.xml new file mode 100644 index 0000000..f07b730 --- /dev/null +++ b/maven-build/lucene/codecs/src/java/pom.xml @@ -0,0 +1,86 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-codecs + jar + Lucene codecs + + Codecs and postings formats for Apache Lucene. + + + lucene/codecs + ../../../../.. + ${relative-top-level}/${module-directory}/src/java + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + + ${module-path} + + + ${module-path}/../resources + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + org.apache.maven.plugins + maven-compiler-plugin + + true + + + + de.thetaphi + forbiddenapis + + + lucene-shared-test-check-forbidden-apis + none + + + + + + diff --git a/maven-build/lucene/codecs/src/test/pom.xml b/maven-build/lucene/codecs/src/test/pom.xml new file mode 100644 index 0000000..b70fa32 --- /dev/null +++ b/maven-build/lucene/codecs/src/test/pom.xml @@ -0,0 +1,86 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-codecs-tests + Lucene codecs tests + jar + + lucene/codecs + ../../../../.. + ${relative-top-level}/${module-directory}/src/test + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + test + + + + + + ${module-path} + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + + + + diff --git a/maven-build/lucene/core/pom.xml b/maven-build/lucene/core/pom.xml new file mode 100644 index 0000000..2ab23cd --- /dev/null +++ b/maven-build/lucene/core/pom.xml @@ -0,0 +1,46 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-core-aggregator + pom + Lucene Core aggregator POM + + src/java + src/test + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/maven-build/lucene/core/src/java/pom.xml b/maven-build/lucene/core/src/java/pom.xml new file mode 100644 index 0000000..7ebafee --- /dev/null +++ b/maven-build/lucene/core/src/java/pom.xml @@ -0,0 +1,134 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-core + jar + Lucene Core + Apache Lucene Java Core + + lucene/core + ../../../../.. + ${relative-top-level}/${module-directory}/src/java + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + ${module-path} + + + ${module-path}/../resources + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + org.apache.maven.plugins + maven-compiler-plugin + + true + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + lucene-shared-test-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + check + + + + check-system-out + + + jdk-system-out + + + + org/apache/lucene/index/CheckIndex.class + org/apache/lucene/index/IndexUpgrader.class + org/apache/lucene/store/LockVerifyServer.class + org/apache/lucene/store/LockStressTest.class + + org/apache/lucene/util/PrintStreamInfoStream.class + + + + check + + + + check-rue + + + ${top-level}/lucene/tools/forbiddenApis/rue.txt + + + org/apache/lucene/search/FieldCache$CacheEntry.class + org/apache/lucene/util/RamUsageEstimator.class + org/apache/lucene/search/CachingWrapperFilter.class + + + + check + + + + + + + diff --git a/maven-build/lucene/core/src/test/pom.xml b/maven-build/lucene/core/src/test/pom.xml new file mode 100644 index 0000000..cddded6 --- /dev/null +++ b/maven-build/lucene/core/src/test/pom.xml @@ -0,0 +1,95 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-core-tests + Lucene Core tests + jar + + lucene/core + ../../../../.. + ${relative-top-level}/${module-directory}/src/test + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + test + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + + ${module-path} + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${project.version} + + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + + + + diff --git a/maven-build/lucene/demo/pom.xml b/maven-build/lucene/demo/pom.xml new file mode 100644 index 0000000..a94832a --- /dev/null +++ b/maven-build/lucene/demo/pom.xml @@ -0,0 +1,144 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-demo + jar + Lucene Demo + This is the demo for Apache Lucene Java + + lucene/demo + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-facet + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + javax.servlet + servlet-api + + + org.antlr + antlr-runtime + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + check + + + + + + + diff --git a/maven-build/lucene/expressions/pom.xml b/maven-build/lucene/expressions/pom.xml new file mode 100644 index 0000000..ca5149f --- /dev/null +++ b/maven-build/lucene/expressions/pom.xml @@ -0,0 +1,82 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-expressions + jar + Lucene Expressions + + Dynamically computed values to sort/facet/search on based on a pluggable grammar. + + + lucene/expressions + ../../.. + ${relative-top-level}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + org.antlr + antlr-runtime + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + diff --git a/maven-build/lucene/facet/pom.xml b/maven-build/lucene/facet/pom.xml new file mode 100644 index 0000000..840dab2 --- /dev/null +++ b/maven-build/lucene/facet/pom.xml @@ -0,0 +1,128 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-facet + jar + Lucene Facets + + Package for Faceted Indexing and Search + + + lucene/facet + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + check + + + + check-system-out + + + jdk-system-out + + + + org/apache/lucene/facet/taxonomy/PrintTaxonomyStats.class + + + + check + + + + + + + diff --git a/maven-build/lucene/grouping/pom.xml b/maven-build/lucene/grouping/pom.xml new file mode 100644 index 0000000..642fdc9 --- /dev/null +++ b/maven-build/lucene/grouping/pom.xml @@ -0,0 +1,77 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-grouping + jar + Lucene Grouping + Lucene Grouping Module + + lucene/grouping + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/highlighter/pom.xml b/maven-build/lucene/highlighter/pom.xml new file mode 100644 index 0000000..f538eef --- /dev/null +++ b/maven-build/lucene/highlighter/pom.xml @@ -0,0 +1,83 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + jar + Lucene Highlighter + + This is the highlighter for apache lucene java + + + lucene/highlighter + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/join/pom.xml b/maven-build/lucene/join/pom.xml new file mode 100644 index 0000000..23074b4 --- /dev/null +++ b/maven-build/lucene/join/pom.xml @@ -0,0 +1,77 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-join + jar + Lucene Join + Lucene Join Module + + lucene/join + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/memory/pom.xml b/maven-build/lucene/memory/pom.xml new file mode 100644 index 0000000..4792ca6 --- /dev/null +++ b/maven-build/lucene/memory/pom.xml @@ -0,0 +1,124 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-memory + jar + Lucene Memory + + High-performance single-document index to compare against Query + + + lucene/memory + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + jdk-system-out + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + check + + + + check-rue + + + ${top-level}/lucene/tools/forbiddenApis/rue.txt + + + org/apache/lucene/index/memory/MemoryIndex.class + + + + check + + + + + + + diff --git a/maven-build/lucene/misc/pom.xml b/maven-build/lucene/misc/pom.xml new file mode 100644 index 0000000..23ae79e --- /dev/null +++ b/maven-build/lucene/misc/pom.xml @@ -0,0 +1,122 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-misc + jar + Lucene Miscellaneous + Miscellaneous Lucene extensions + + lucene/misc + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + check + + + + check-system-out + + + jdk-system-out + + + + org/apache/lucene/index/CompoundFileExtractor.class + org/apache/lucene/index/IndexSplitter.class + org/apache/lucene/index/MultiPassIndexSplitter.class + org/apache/lucene/misc/GetTermInfo.class + org/apache/lucene/misc/HighFreqTerms.class + org/apache/lucene/misc/IndexMergeTool.class + + + + check + + + + + + + diff --git a/maven-build/lucene/pom.xml b/maven-build/lucene/pom.xml new file mode 100644 index 0000000..7e3ea95 --- /dev/null +++ b/maven-build/lucene/pom.xml @@ -0,0 +1,109 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-solr-grandparent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-parent + pom + Lucene parent POM + Lucene parent POM + + lucene + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + core + codecs + test-framework + analysis + benchmark + classification + demo + expressions + facet + grouping + highlighter + join + memory + misc + queries + queryparser + replicator + sandbox + spatial + suggest + + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + jdk-system-out + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + ${top-level}/lucene/tools/forbiddenApis/rue.txt + + + + check + + + + lucene-shared-test-check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + ${top-level}/lucene/tools/forbiddenApis/tests.txt + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + testCheck + + + + + + + diff --git a/maven-build/lucene/queries/pom.xml b/maven-build/lucene/queries/pom.xml new file mode 100644 index 0000000..47882e1 --- /dev/null +++ b/maven-build/lucene/queries/pom.xml @@ -0,0 +1,73 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-queries + jar + Lucene Queries + Lucene Queries Module + + lucene/queries + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/queryparser/pom.xml b/maven-build/lucene/queryparser/pom.xml new file mode 100644 index 0000000..b26f213 --- /dev/null +++ b/maven-build/lucene/queryparser/pom.xml @@ -0,0 +1,132 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + jar + Lucene QueryParsers + Lucene QueryParsers module + + lucene/queryparser + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-sandbox + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + + + ${module-path}/src/resources + + + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + check + + + + check-system-out + + + jdk-system-out + + + + org/apache/lucene/queryparser/classic/QueryParserTokenManager.class + org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.class + org/apache/lucene/queryparser/surround/parser/QueryParserTokenManager.class + + + + check + + + + + + + diff --git a/maven-build/lucene/replicator/pom.xml b/maven-build/lucene/replicator/pom.xml new file mode 100644 index 0000000..6986ef4 --- /dev/null +++ b/maven-build/lucene/replicator/pom.xml @@ -0,0 +1,127 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-replicator + jar + Lucene Replicator + Lucene Replicator Module + + lucene/replicator + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-facet + + + commons-logging + commons-logging + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty.orbit + javax.servlet + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${tests.jettyConnector} + + + + + + diff --git a/maven-build/lucene/sandbox/pom.xml b/maven-build/lucene/sandbox/pom.xml new file mode 100644 index 0000000..8b1806b --- /dev/null +++ b/maven-build/lucene/sandbox/pom.xml @@ -0,0 +1,76 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-sandbox + jar + Lucene Sandbox + Lucene Sandbox + + lucene/sandbox + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + jakarta-regexp + jakarta-regexp + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/spatial/pom.xml b/maven-build/lucene/spatial/pom.xml new file mode 100644 index 0000000..42bff99 --- /dev/null +++ b/maven-build/lucene/spatial/pom.xml @@ -0,0 +1,74 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-spatial + jar + Lucene Spatial + + Spatial Strategies for Apache Lucene + + + lucene/spatial + ../../.. + ${relative-top-level}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + com.spatial4j + spatial4j + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + + diff --git a/maven-build/lucene/suggest/pom.xml b/maven-build/lucene/suggest/pom.xml new file mode 100644 index 0000000..68c112e --- /dev/null +++ b/maven-build/lucene/suggest/pom.xml @@ -0,0 +1,85 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-suggest + jar + Lucene Suggest + Lucene Suggest Module + + lucene/suggest + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/maven-build/lucene/test-framework/pom.xml b/maven-build/lucene/test-framework/pom.xml new file mode 100644 index 0000000..8423a41 --- /dev/null +++ b/maven-build/lucene/test-framework/pom.xml @@ -0,0 +1,138 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + jar + Lucene Test Framework + Apache Lucene Java Test Framework + + lucene/test-framework + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + com.carrotsearch.randomizedtesting + junit4-ant + + + com.carrotsearch.randomizedtesting + randomizedtesting-runner + + + junit + junit + + + org.apache.ant + ant + + + + + + ${module-path}/src/java + + + ${module-path}/src/resources + + + ${project.build.sourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + de.thetaphi + forbiddenapis + + + lucene-shared-check-forbidden-apis + none + + + lucene-shared-test-check-forbidden-apis + none + + + test-check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + ${top-level}/lucene/tools/forbiddenApis/tests.txt + + + + check + + + + + + + + + sonatype.releases + Sonatype Releases Repository + http://oss.sonatype.org/content/repositories/releases + + true + + + never + + + + diff --git a/maven-build/pom.xml b/maven-build/pom.xml new file mode 100644 index 0000000..5e64e70 --- /dev/null +++ b/maven-build/pom.xml @@ -0,0 +1,8196 @@ + + + 4.0.0 + + org.apache + apache + 13 + + + uk.co.flax.lucene-solr-intervals + lucene-solr-grandparent + r1581360-intervals-1.3.1-SNAPSHOT + pom + Grandparent POM for Apache Lucene Core and Apache Solr + Grandparent POM for Apache Lucene Core and Apache Solr + http://lucene.apache.org + + lucene + solr + + + http://svn.apache.org/repos/asf/lucene/dev/trunk + https://svn.apache.org/repos/asf/lucene/dev/trunk + http://svn.apache.org/viewvc/lucene/dev/trunk + 5.0.0 + yyyy-MM-dd HH:mm:ss + 1.7 + 8.1.10.v20130312 + + + 1 + + + + + + + + random + random + random + 5.0 + 1 + false + random + random + false + ${tests.verbose} + + + JIRA + https://issues.apache.org/jira/browse/LUCENE + + + Jenkins + https://builds.apache.org/computer/lucene/ + + + + General List + general-subscribe@lucene.apache.org + general-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-general/ + + + + Java User List + java-user-subscribe@lucene.apache.org + java-user-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-java-user/ + + + + Java Developer List + dev-subscribe@lucene.apache.org + dev-unsubscribe@lucene.apache.org + http://mail-archives.apache.org/mod_mbox/lucene-dev/ + + + Java Commits List + commits-subscribe@lucene.apache.org + commits-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-java-commits/ + + + + 2000 + + scm:svn:${vc-anonymous-base-url} + scm:svn:${vc-dev-base-url} + ${vc-browse-base-url} + + + + Apache 2 + http://www.apache.org/licenses/LICENSE-2.0.txt + + + + + apache.snapshots + Apache Snapshot Repository + http://repository.apache.org/snapshots + + false + + + + false + never + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-icu + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + com.ibm.icu + icu4j + + + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-morfologik + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + org.carrot2 + morfologik-fsa + + + org.carrot2 + morfologik-polish + + + org.carrot2 + morfologik-stemming + + + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + commons-codec + commons-codec + + + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-smartcn + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-stempel + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-uima + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + org.apache.uima + Tagger + + + org.apache.uima + WhitespaceTokenizer + + + org.apache.uima + uimaj-core + + + + + uk.co.flax.lucene-solr-intervals + lucene-benchmark + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-facet + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + net.sourceforge.nekohtml + nekohtml + + + org.apache.commons + commons-compress + + + xerces + xercesImpl + + + + + uk.co.flax.lucene-solr-intervals + lucene-classification + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + + uk.co.flax.lucene-solr-intervals + lucene-core + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-demo + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-facet + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + javax.servlet + servlet-api + + + org.antlr + antlr-runtime + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + org.antlr + antlr-runtime + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + + + uk.co.flax.lucene-solr-intervals + lucene-facet + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + + + uk.co.flax.lucene-solr-intervals + lucene-join + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + + + uk.co.flax.lucene-solr-intervals + lucene-memory + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + + uk.co.flax.lucene-solr-intervals + lucene-misc + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + + uk.co.flax.lucene-solr-intervals + lucene-queries + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-sandbox + + + + + uk.co.flax.lucene-solr-intervals + lucene-replicator + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-facet + + + commons-logging + commons-logging + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty.orbit + javax.servlet + + + + + uk.co.flax.lucene-solr-intervals + lucene-sandbox + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + jakarta-regexp + jakarta-regexp + + + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + com.spatial4j + spatial4j + + + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + com.carrotsearch.randomizedtesting + junit4-ant + + + com.carrotsearch.randomizedtesting + randomizedtesting-runner + + + junit + junit + + + org.apache.ant + ant + + + + + uk.co.flax.lucene-solr-intervals + solr-analysis-extras + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-icu + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-morfologik + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-smartcn + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-stempel + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.zookeeper + zookeeper + + + org.carrot2 + morfologik-fsa + + + org.carrot2 + morfologik-polish + + + org.carrot2 + morfologik-stemming + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + + + uk.co.flax.lucene-solr-intervals + solr-cell + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + + uk.co.flax.lucene-solr-intervals + solr-clustering + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.mahout + mahout-collections + + + org.apache.mahout + mahout-math + + + org.apache.zookeeper + zookeeper + + + org.carrot2 + carrot2-mini + + + org.carrot2.attributes + attributes-binder + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.simpleframework + simple-xml + + + + + uk.co.flax.lucene-solr-intervals + solr-core + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + + + uk.co.flax.lucene-solr-intervals + solr-dataimporthandler + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + javax.activation + activation + + + javax.mail + mail + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + + + uk.co.flax.lucene-solr-intervals + solr-dataimporthandler-extras + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-dataimporthandler + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + javax.activation + activation + + + javax.mail + mail + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + + uk.co.flax.lucene-solr-intervals + solr-langid + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.cybozu.labs + langdetect + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + net.arnx + jsonic + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + + uk.co.flax.lucene-solr-intervals + solr-map-reduce + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-cell + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-morphlines-cell + + + uk.co.flax.lucene-solr-intervals + solr-morphlines-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.codahale.metrics + metrics-core + + + com.codahale.metrics + metrics-healthchecks + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + com.typesafe + config + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + net.sf.saxon + Saxon-HE + + + net.sourceforge.argparse4j + argparse4j + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.kitesdk + kite-morphlines-avro + + + org.kitesdk + kite-morphlines-core + + + org.kitesdk + kite-morphlines-hadoop-sequencefile + + + org.kitesdk + kite-morphlines-json + + + org.kitesdk + kite-morphlines-saxon + + + org.kitesdk + kite-morphlines-tika-core + + + org.kitesdk + kite-morphlines-tika-decompress + + + org.kitesdk + kite-morphlines-twitter + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + + uk.co.flax.lucene-solr-intervals + solr-morphlines-cell + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-cell + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-morphlines-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.codahale.metrics + metrics-core + + + com.codahale.metrics + metrics-healthchecks + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + com.typesafe + config + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.kitesdk + kite-morphlines-avro + + + org.kitesdk + kite-morphlines-core + + + org.kitesdk + kite-morphlines-json + + + org.kitesdk + kite-morphlines-tika-core + + + org.kitesdk + kite-morphlines-tika-decompress + + + org.kitesdk + kite-morphlines-twitter + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + + uk.co.flax.lucene-solr-intervals + solr-morphlines-core + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-cell + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.codahale.metrics + metrics-core + + + com.codahale.metrics + metrics-healthchecks + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + com.typesafe + config + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.kitesdk + kite-morphlines-avro + + + org.kitesdk + kite-morphlines-core + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + + uk.co.flax.lucene-solr-intervals + solr-solrj + ${project.version} + + + commons-io + commons-io + + + log4j + log4j + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.noggit + noggit + + + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.carrotsearch.randomizedtesting + junit4-ant + + + com.carrotsearch.randomizedtesting + randomizedtesting-runner + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + junit + junit + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.ant + ant + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + + + uk.co.flax.lucene-solr-intervals + solr-uima + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-uima + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-digester + commons-digester + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.uima + AlchemyAPIAnnotator + + + org.apache.uima + OpenCalaisAnnotator + + + org.apache.uima + Tagger + + + org.apache.uima + WhitespaceTokenizer + + + org.apache.uima + uimaj-core + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + + + uk.co.flax.lucene-solr-intervals + solr-velocity + ${project.version} + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-beanutils + commons-beanutils + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-collections + commons-collections + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.velocity + velocity + + + org.apache.velocity + velocity-tools + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + + + aopalliance + aopalliance + 1.0 + + + asm + asm + 3.1 + + + cglib + cglib-nodep + 2.2 + + + com.adobe.xmp + xmpcore + 5.1.2 + + + com.carrotsearch.randomizedtesting + junit4-ant + 2.1.1 + + + com.carrotsearch.randomizedtesting + randomizedtesting-runner + 2.1.1 + + + junit + junit + + + + + com.carrotsearch + hppc + 0.5.2 + + + com.codahale.metrics + metrics-core + 3.0.1 + + + org.slf4j + slf4j-api + + + + + com.codahale.metrics + metrics-healthchecks + 3.0.1 + + + org.slf4j + slf4j-api + + + + + com.cybozu.labs + langdetect + 1.1-20120112 + + + net.arnx + jsonic + + + + + com.drewnoakes + metadata-extractor + 2.6.2 + + + com.adobe.xmp + xmpcore + + + xerces + xercesImpl + + + + + com.fasterxml.jackson.core + jackson-annotations + 2.3.1 + + + com.fasterxml.jackson.core + jackson-core + 2.3.1 + + + com.fasterxml.jackson.core + jackson-databind + 2.3.1 + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + + + com.google.guava + guava + 14.0.1 + + + com.google.inject.extensions + guice-servlet + 3.0 + + + com.google.inject + guice + + + + + com.google.inject + guice + 3.0 + + + aopalliance + aopalliance + + + javax.inject + javax.inject + + + org.sonatype.sisu.inject + cglib + + + + + com.google.protobuf + protobuf-java + 2.5.0 + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + 1.2 + + + com.googlecode.juniversalchardet + juniversalchardet + 1.0.3 + + + com.googlecode.mp4parser + isoparser + 1.0-RC-1 + + + org.aspectj + aspectjrt + + + + + com.ibm.icu + icu4j + 52.1 + + + com.spatial4j + spatial4j + 0.4.1 + + + com.sun.jersey.contribs + jersey-guice + 1.8 + + + com.google.inject.extensions + guice-servlet + + + com.google.inject + guice + + + javax.inject + javax.inject + + + + + com.sun.jersey + jersey-bundle + 1.8 + + + javax.ws.rs + jsr311-api + + + + + com.sun.jersey + jersey-core + 1.8 + + + com.sun.jersey + jersey-json + 1.8 + + + com.sun.xml.bind + jaxb-impl + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-jaxrs + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-xc + + + org.codehaus.jettison + jettison + + + + + com.sun.jersey + jersey-server + 1.8 + + + asm + asm + + + + + com.sun.xml.bind + jaxb-impl + 2.2.2 + + + javax.xml.bind + jaxb-api + + + + + com.thoughtworks.paranamer + paranamer + 2.3 + + + com.typesafe + config + 1.0.2 + + + commons-beanutils + commons-beanutils + 1.7.0 + + + commons-logging + commons-logging + + + + + commons-cli + commons-cli + 1.2 + + + commons-codec + commons-codec + 1.9 + + + commons-collections + commons-collections + 3.2.1 + + + commons-configuration + commons-configuration + 1.6 + + + commons-beanutils + commons-beanutils-core + + + commons-collections + commons-collections + + + commons-digester + commons-digester + + + commons-lang + commons-lang + + + commons-logging + commons-logging + + + + + commons-digester + commons-digester + 2.0 + + + commons-beanutils + commons-beanutils + + + commons-logging + commons-logging + + + + + commons-fileupload + commons-fileupload + 1.2.1 + + + commons-io + commons-io + 2.1 + + + commons-lang + commons-lang + 2.6 + + + commons-logging + commons-logging + 1.1.1 + + + de.l3s.boilerpipe + boilerpipe + 1.1.0 + + + dom4j + dom4j + 1.6.1 + + + xml-apis + xml-apis + + + + + edu.ucar + netcdf + 4.2-min + + + org.slf4j + slf4j-api + + + + + hsqldb + hsqldb + 1.8.0.10 + + + io.netty + netty + 3.6.2.Final + + + jakarta-regexp + jakarta-regexp + 1.4 + + + javax.activation + activation + 1.1 + + + javax.inject + javax.inject + 1 + + + javax.mail + mail + 1.4.1 + + + javax.activation + activation + + + + + javax.servlet + javax.servlet-api + 3.0.1 + + + javax.servlet + servlet-api + 2.4 + + + jdom + jdom + 1.0 + + + joda-time + joda-time + 2.2 + + + junit + junit + 4.10 + + + org.hamcrest + hamcrest-core + + + + + log4j + log4j + 1.2.16 + + + net.arnx + jsonic + 1.2.7 + + + net.sf.saxon + Saxon-HE + 9.5.1-4 + + + net.sourceforge.argparse4j + argparse4j + 0.4.3 + + + net.sourceforge.nekohtml + nekohtml + 1.9.17 + + + xerces + xercesImpl + + + + + org.antlr + antlr-runtime + 3.5 + + + org.antlr + stringtemplate + + + + + org.apache.ant + ant + 1.8.2 + + + org.apache.ant + ant-launcher + + + + + org.apache.avro + avro + 1.7.4 + + + com.thoughtworks.paranamer + paranamer + + + org.apache.commons + commons-compress + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.slf4j + slf4j-api + + + org.xerial.snappy + snappy-java + + + + + org.apache.commons + commons-compress + 1.7 + + + org.tukaani + xz + + + + + org.apache.derby + derby + 10.9.1.0 + + + org.apache.hadoop + hadoop-annotations + 2.2.0 + + + org.apache.hadoop + hadoop-auth + 2.2.0 + + + commons-codec + commons-codec + + + log4j + log4j + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-common + 2.2.0 + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.jcraft + jsch + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-el + commons-el + + + commons-httpclient + commons-httpclient + + + commons-io + commons-io + + + commons-lang + commons-lang + + + commons-logging + commons-logging + + + commons-net + commons-net + + + javax.servlet.jsp + jsp-api + + + javax.servlet + servlet-api + + + log4j + log4j + + + net.java.dev.jets3t + jets3t + + + org.apache.avro + avro + + + org.apache.commons + commons-compress + + + org.apache.commons + commons-math + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + tomcat + jasper-compiler + + + tomcat + jasper-runtime + + + xmlenc + xmlenc + + + + + org.apache.hadoop + hadoop-common + 2.2.0 + tests + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.jcraft + jsch + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-el + commons-el + + + commons-httpclient + commons-httpclient + + + commons-io + commons-io + + + commons-lang + commons-lang + + + commons-logging + commons-logging + + + commons-net + commons-net + + + javax.servlet.jsp + jsp-api + + + javax.servlet + servlet-api + + + log4j + log4j + + + net.java.dev.jets3t + jets3t + + + org.apache.avro + avro + + + org.apache.commons + commons-compress + + + org.apache.commons + commons-math + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + tomcat + jasper-compiler + + + tomcat + jasper-runtime + + + xmlenc + xmlenc + + + + + org.apache.hadoop + hadoop-hdfs + 2.2.0 + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-server + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-daemon + commons-daemon + + + commons-io + commons-io + + + commons-lang + commons-lang + + + commons-logging + commons-logging + + + javax.servlet.jsp + jsp-api + + + javax.servlet + servlet-api + + + log4j + log4j + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + xmlenc + xmlenc + + + + + org.apache.hadoop + hadoop-hdfs + 2.2.0 + tests + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-server + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-daemon + commons-daemon + + + commons-io + commons-io + + + commons-lang + commons-lang + + + commons-logging + commons-logging + + + javax.servlet.jsp + jsp-api + + + javax.servlet + servlet-api + + + log4j + log4j + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + tomcat + jasper-runtime + + + xmlenc + xmlenc + + + + + org.apache.hadoop + hadoop-mapreduce-client-app + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.protobuf + protobuf-java + + + io.netty + netty + + + org.apache.avro + avro + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-mapreduce-client-common + + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-mapreduce-client-common + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.protobuf + protobuf-java + + + io.netty + netty + + + org.apache.avro + avro + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-server-common + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.protobuf + protobuf-java + + + io.netty + netty + + + org.apache.avro + avro + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-yarn-common + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-mapreduce-client-hs + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.protobuf + protobuf-java + + + io.netty + netty + + + org.apache.avro + avro + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-mapreduce-client-app + + + org.apache.hadoop + hadoop-mapreduce-client-common + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.protobuf + protobuf-java + + + io.netty + netty + + + org.apache.avro + avro + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-mapreduce-client-common + + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + 2.2.0 + tests + + + com.google.inject.extensions + guice-servlet + + + com.google.protobuf + protobuf-java + + + io.netty + netty + + + org.apache.avro + avro + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-mapreduce-client-common + + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.protobuf + protobuf-java + + + io.netty + netty + + + org.apache.avro + avro + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + org.apache.hadoop + hadoop-yarn-server-nodemanager + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-yarn-api + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.inject + guice + + + com.google.protobuf + protobuf-java + + + com.sun.jersey.contribs + jersey-guice + + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-io + commons-io + + + io.netty + netty + + + org.apache.hadoop + hadoop-annotations + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-yarn-client + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.inject + guice + + + com.google.protobuf + protobuf-java + + + com.sun.jersey.contribs + jersey-guice + + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-io + commons-io + + + io.netty + netty + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-common + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-yarn-common + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.inject + guice + + + com.google.protobuf + protobuf-java + + + com.sun.jersey.contribs + jersey-guice + + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-io + commons-io + + + io.netty + netty + + + log4j + log4j + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-yarn-api + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-yarn-server-common + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.inject + guice + + + com.google.protobuf + protobuf-java + + + com.sun.jersey.contribs + jersey-guice + + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-io + commons-io + + + io.netty + netty + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-yarn-common + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-yarn-server-nodemanager + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.inject + guice + + + com.google.protobuf + protobuf-java + + + com.sun.jersey.contribs + jersey-guice + + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-io + commons-io + + + io.netty + netty + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-yarn-server-common + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.inject + guice + + + com.google.protobuf + protobuf-java + + + com.sun.jersey.contribs + jersey-guice + + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-io + commons-io + + + io.netty + netty + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-yarn-server-common + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-yarn-server-tests + 2.2.0 + tests + + + com.google.inject.extensions + guice-servlet + + + com.google.inject + guice + + + com.google.protobuf + protobuf-java + + + com.sun.jersey.contribs + jersey-guice + + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-io + commons-io + + + io.netty + netty + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-yarn-server-common + + + org.apache.hadoop + hadoop-yarn-server-nodemanager + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + 2.2.0 + + + com.google.inject.extensions + guice-servlet + + + com.google.inject + guice + + + com.google.protobuf + protobuf-java + + + com.sun.jersey.contribs + jersey-guice + + + com.sun.jersey.jersey-test-framework + jersey-test-framework-grizzly2 + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + commons-io + commons-io + + + io.netty + netty + + + javax.servlet + servlet-api + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-server-common + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.httpcomponents + httpclient + 4.3.1 + + + commons-codec + commons-codec + + + commons-logging + commons-logging + + + org.apache.httpcomponents + httpcore + + + + + org.apache.httpcomponents + httpcore + 4.3 + + + org.apache.httpcomponents + httpmime + 4.3.1 + + + org.apache.httpcomponents + httpclient + + + + + org.apache.james + apache-mime4j-core + 0.7.2 + + + org.apache.james + apache-mime4j-dom + 0.7.2 + + + org.apache.james + apache-mime4j-core + + + + + org.apache.mahout + mahout-collections + 1.0 + + + org.apache.mahout + mahout-math + 0.6 + + + com.google.guava + guava + + + org.apache.commons + commons-math + + + org.apache.mahout + mahout-collections + + + org.slf4j + slf4j-api + + + org.uncommons.maths + uncommons-maths + + + + + org.apache.mrunit + mrunit + 1.0.0 + hadoop2 + + + commons-logging + commons-logging + + + junit + junit + + + org.mockito + mockito-all + + + + + org.apache.pdfbox + fontbox + 1.8.4 + + + commons-logging + commons-logging + + + + + org.apache.pdfbox + jempbox + 1.8.4 + + + org.apache.pdfbox + pdfbox + 1.8.4 + + + commons-logging + commons-logging + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + + + org.apache.poi + poi + 3.10-beta2 + + + commons-codec + commons-codec + + + + + org.apache.poi + poi-ooxml + 3.10-beta2 + + + dom4j + dom4j + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml-schemas + + + + + org.apache.poi + poi-ooxml-schemas + 3.10-beta2 + + + org.apache.xmlbeans + xmlbeans + + + + + org.apache.poi + poi-scratchpad + 3.10-beta2 + + + org.apache.poi + poi + + + + + org.apache.tika + tika-core + 1.5 + + + org.apache.tika + tika-parsers + 1.5 + + + com.drewnoakes + metadata-extractor + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.uwyn + jhighlight + + + commons-codec + commons-codec + + + de.l3s.boilerpipe + boilerpipe + + + edu.ucar + netcdf + + + org.apache.commons + commons-compress + + + org.apache.geronimo.specs + geronimo-stax-api_1.0_spec + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.ow2.asm + asm-debug-all + + + rome + rome + + + + + org.apache.tika + tika-xmp + 1.5 + + + com.adobe.xmp + xmpcore + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + + + org.apache.uima + AlchemyAPIAnnotator + 2.3.1 + + + commons-digester + commons-digester + + + commons-lang + commons-lang + + + + + org.apache.uima + OpenCalaisAnnotator + 2.3.1 + + + commons-io + commons-io + + + + + org.apache.uima + Tagger + 2.3.1 + + + org.apache.uima + WhitespaceTokenizer + + + + + org.apache.uima + WhitespaceTokenizer + 2.3.1 + + + org.apache.uima + uimaj-core + 2.3.1 + + + org.apache.velocity + velocity + 1.7 + + + commons-collections + commons-collections + + + commons-lang + commons-lang + + + + + org.apache.velocity + velocity-tools + 2.0 + + + commons-beanutils + commons-beanutils + + + commons-chain + commons-chain + + + commons-collections + commons-collections + + + commons-digester + commons-digester + + + commons-logging + commons-logging + + + commons-validator + commons-validator + + + dom4j + dom4j + + + org.apache.struts + struts-core + + + org.apache.struts + struts-taglib + + + org.apache.struts + struts-tiles + + + org.apache.velocity + velocity + + + oro + oro + + + sslext + sslext + + + + + org.apache.xmlbeans + xmlbeans + 2.3.0 + + + stax + stax-api + + + + + org.apache.zookeeper + zookeeper + 3.4.6 + + + io.netty + netty + + + jline + jline + + + log4j + log4j + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + + org.aspectj + aspectjrt + 1.6.11 + + + org.bouncycastle + bcmail-jdk15 + 1.45 + + + org.bouncycastle + bcprov-jdk15 + + + + + org.bouncycastle + bcprov-jdk15 + 1.45 + + + org.carrot2.attributes + attributes-binder + 1.2.1 + + + com.google.guava + guava + + + org.simpleframework + simple-xml + + + + + org.carrot2 + carrot2-mini + 3.9.0 + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + commons-lang + commons-lang + + + org.apache.mahout + mahout-collections + + + org.apache.mahout + mahout-math + + + org.carrot2.attributes + attributes-binder + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.slf4j + slf4j-api + + + + + org.carrot2 + morfologik-fsa + 1.7.1 + + + org.carrot2 + morfologik-polish + 1.7.1 + + + org.carrot2 + morfologik-stemming + + + + + org.carrot2 + morfologik-stemming + 1.7.1 + + + org.carrot2 + morfologik-fsa + + + + + org.ccil.cowan.tagsoup + tagsoup + 1.2.1 + + + org.codehaus.jackson + jackson-core-asl + 1.9.13 + + + org.codehaus.jackson + jackson-jaxrs + 1.9.13 + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + + + org.codehaus.jackson + jackson-mapper-asl + 1.9.13 + + + org.codehaus.jackson + jackson-core-asl + + + + + org.codehaus.woodstox + wstx-asl + 3.2.7 + + + stax + stax-api + + + + + org.easymock + easymock + 3.0 + + + cglib + cglib-nodep + + + org.objenesis + objenesis + + + + + org.eclipse.jetty.orbit + javax.servlet + 3.0.0.v201112011016 + + + org.eclipse.jetty + jetty-continuation + 8.1.10.v20130312 + + + org.eclipse.jetty + jetty-deploy + 8.1.10.v20130312 + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + + + org.eclipse.jetty + jetty-http + 8.1.10.v20130312 + + + org.eclipse.jetty + jetty-io + + + + + org.eclipse.jetty + jetty-io + 8.1.10.v20130312 + + + org.eclipse.jetty + jetty-util + + + + + org.eclipse.jetty + jetty-jmx + 8.1.10.v20130312 + + + org.eclipse.jetty + jetty-util + + + + + org.eclipse.jetty + jetty-security + 8.1.10.v20130312 + + + org.eclipse.jetty + jetty-server + + + + + org.eclipse.jetty + jetty-server + 8.1.10.v20130312 + + + org.eclipse.jetty.orbit + javax.servlet + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-http + + + + + org.eclipse.jetty + jetty-servlet + 8.1.10.v20130312 + + + org.eclipse.jetty + jetty-security + + + + + org.eclipse.jetty + jetty-util + 8.1.10.v20130312 + + + org.eclipse.jetty + jetty-webapp + 8.1.10.v20130312 + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-xml + + + + + org.eclipse.jetty + jetty-xml + 8.1.10.v20130312 + + + org.eclipse.jetty + jetty-util + + + + + org.gagravarr + vorbis-java-core + 0.1 + + + org.gagravarr + vorbis-java-tika + 0.1 + + + org.apache.tika + tika-core + + + org.gagravarr + vorbis-java-core + + + + + org.kitesdk + kite-morphlines-avro + 0.12.1 + + + org.apache.avro + avro + + + org.kitesdk + kite-morphlines-core + + + + + org.kitesdk + kite-morphlines-core + 0.12.1 + + + com.codahale.metrics + metrics-core + + + com.codahale.metrics + metrics-healthchecks + + + com.google.guava + guava + + + com.typesafe + config + + + org.slf4j + slf4j-api + + + + + org.kitesdk + kite-morphlines-core + 0.12.1 + tests + + + com.codahale.metrics + metrics-core + + + com.codahale.metrics + metrics-healthchecks + + + com.google.guava + guava + + + com.typesafe + config + + + org.slf4j + slf4j-api + + + + + org.kitesdk + kite-morphlines-hadoop-sequencefile + 0.12.1 + + + org.kitesdk + kite-morphlines-core + + + + + org.kitesdk + kite-morphlines-json + 0.12.1 + + + com.fasterxml.jackson.core + jackson-databind + + + org.kitesdk + kite-morphlines-core + + + + + org.kitesdk + kite-morphlines-saxon + 0.12.1 + + + net.sf.saxon + Saxon-HE + + + org.ccil.cowan.tagsoup + tagsoup + + + org.kitesdk + kite-morphlines-core + + + + + org.kitesdk + kite-morphlines-tika-core + 0.12.1 + + + org.apache.tika + tika-core + + + org.kitesdk + kite-morphlines-core + + + + + org.kitesdk + kite-morphlines-tika-decompress + 0.12.1 + + + org.apache.commons + commons-compress + + + org.apache.tika + tika-core + + + org.kitesdk + kite-morphlines-core + + + + + org.kitesdk + kite-morphlines-twitter + 0.12.1 + + + com.fasterxml.jackson.core + jackson-databind + + + org.kitesdk + kite-morphlines-core + + + + + org.mockito + mockito-core + 1.9.5 + + + org.hamcrest + hamcrest-core + + + org.objenesis + objenesis + + + + + org.mortbay.jetty + jetty + 6.1.26 + + + org.mortbay.jetty + jetty-util + + + org.mortbay.jetty + servlet-api + + + + + org.mortbay.jetty + jetty-util + 6.1.26 + + + org.noggit + noggit + 0.5 + + + org.objenesis + objenesis + 1.2 + + + org.ow2.asm + asm + 4.1 + + + org.ow2.asm + asm-commons + 4.1 + + + org.ow2.asm + asm-tree + + + + + org.restlet.jee + org.restlet + 2.1.1 + + + org.restlet.jee + org.restlet.ext.servlet + 2.1.1 + + + org.restlet.jee + org.restlet + + + + + org.simpleframework + simple-xml + 2.7 + + + stax + stax + + + stax + stax-api + + + xpp3 + xpp3 + + + + + org.slf4j + jcl-over-slf4j + 1.7.6 + + + org.slf4j + slf4j-api + + + + + org.slf4j + jul-to-slf4j + 1.7.6 + + + org.slf4j + slf4j-api + + + + + org.slf4j + slf4j-api + 1.7.6 + + + org.slf4j + slf4j-log4j12 + 1.7.6 + + + log4j + log4j + + + org.slf4j + slf4j-api + + + + + org.tukaani + xz + 1.2 + + + org.xerial.snappy + snappy-java + 1.0.4.1 + + + rome + rome + 0.9 + + + jdom + jdom + + + + + xerces + xercesImpl + 2.9.1 + + + xml-apis + xml-apis + + + + + + + 2.2.1 + + + + junit + junit + test + + + com.carrotsearch.randomizedtesting + randomizedtesting-runner + test + + + + + + + de.thetaphi + forbiddenapis + 1.4 + + + false + ${java.compat.version} + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.7 + + + org.apache.maven.plugins + maven-release-plugin + 2.5 + + + org.apache.maven.plugins + maven-clean-plugin + 2.5 + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + ${java.compat.version} + ${java.compat.version} + + + + org.apache.maven.plugins + maven-dependency-plugin + 2.8 + + + org.apache.maven.plugins + maven-deploy-plugin + 2.7 + + + org.apache.maven.plugins + maven-enforcer-plugin + 1.3 + + + org.apache.maven.plugins + maven-install-plugin + 2.4 + + + org.apache.maven.plugins + maven-jar-plugin + 2.4 + + + + false + false + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.9.1 + + + org.apache.maven.plugins + maven-resources-plugin + 2.6 + + + org.apache.maven.plugins + maven-site-plugin + 3.3 + + + org.apache.maven.plugins + maven-surefire-plugin + 2.15 + + random + plain + ${project.build.directory}/test + true + -Xmx512M -XX:MaxPermSize=256M + + . + true + + + + + ${tests.iters} + ${tests.seed} + ${tests.nightly} + ${tests.weekly} + ${tests.awaitsfix} + ${tests.slow} + + + 1 + ${tests.codec} + ${tests.directory} + ${tests.infostream} + ${tests.locale} + ${tests.luceneMatchVersion} + ${tests.multiplier} + ${tests.postingsformat} + ${tests.timezone} + ${tests.verbose} + + + + + org.apache.maven.plugins + maven-war-plugin + 2.3 + + + + + + + + ${project.groupId} + ${project.groupId} + ${project.name} + + ${base.specification.version}.${now.version} + The Apache Software Foundation + + ${project.version} ${svn.revision} - ${user.name} - ${now.timestamp} + The Apache Software Foundation + ${project.groupId} + ${java.compat.version} + ${java.compat.version} + + + + + + org.codehaus.mojo + build-helper-maven-plugin + 1.8 + + + org.codehaus.mojo + buildnumber-maven-plugin + 1.2 + + + org.mortbay.jetty + jetty-maven-plugin + ${jetty.version} + + + org.codehaus.gmaven + gmaven-plugin + 1.5 + + + + + + org.codehaus.gmaven + gmaven-plugin + + + generate-timestamps-and-get-top-level-basedir + validate + + execute + + + + project.properties['now.timestamp'] = "${maven.build.timestamp}" + project.properties['now.version'] = ("${maven.build.timestamp}" =~ /[- :]/).replaceAll(".") + project.properties['now.year'] = "${maven.build.timestamp}".substring(0, 4) + project.properties['top-level'] = (project.basedir.getAbsolutePath() =~ /[\\\\\/]maven-build.*/).replaceAll("") + + + + + + + org.codehaus.mojo + buildnumber-maven-plugin + + + validate + + create + + + + + false + false + true + NO-REVISION-AVAILABLE + svn.revision + ${top-level} + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + enforce-java-compat-version-and-maven-2.2.1 + + enforce + + + + + Java ${java.compat.version}+ is required. + [${java.compat.version},) + + + Maven 2.2.1+ is required. + [2.2.1,) + + + + + + + + + + + maven-jar-plugin + + + ${project.build.outputDirectory}/META-INF/MANIFEST.MF + + + + + org.apache.felix + maven-bundle-plugin + 2.4.0 + + + *;-split-package:=merge-first + + + + + + ${project.groupId} + ${project.groupId} + ${project.name} + + ${base.specification.version}.${now.version} + The Apache Software Foundation + + ${project.version} ${svn.revision} - ${user.name} - ${now.timestamp} + The Apache Software Foundation + ${project.groupId} + ${java.compat.version} + ${java.compat.version} + + + + + bundle-manifest + process-classes + + manifest + + + + + + + + + + bootstrap + + + + org.apache.maven.plugins + maven-install-plugin + + + + + + + + diff --git a/maven-build/solr/contrib/analysis-extras/pom.xml b/maven-build/solr/contrib/analysis-extras/pom.xml new file mode 100644 index 0000000..5d1aefd --- /dev/null +++ b/maven-build/solr/contrib/analysis-extras/pom.xml @@ -0,0 +1,358 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-analysis-extras + jar + Apache Solr Analysis Extras + Apache Solr Analysis Extras + + solr/contrib/analysis-extras + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-icu + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-morfologik + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-smartcn + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-stempel + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.zookeeper + zookeeper + + + org.carrot2 + morfologik-fsa + + + org.carrot2 + morfologik-polish + + + org.carrot2 + morfologik-stemming + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + diff --git a/maven-build/solr/contrib/clustering/pom.xml b/maven-build/solr/contrib/clustering/pom.xml new file mode 100644 index 0000000..ceb425c --- /dev/null +++ b/maven-build/solr/contrib/clustering/pom.xml @@ -0,0 +1,354 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-clustering + jar + Apache Solr Clustering + Apache Solr Clustering + + solr/contrib/clustering + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.mahout + mahout-collections + + + org.apache.mahout + mahout-math + + + org.apache.zookeeper + zookeeper + + + org.carrot2 + carrot2-mini + + + org.carrot2.attributes + attributes-binder + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.simpleframework + simple-xml + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + diff --git a/maven-build/solr/contrib/dataimporthandler-extras/pom.xml b/maven-build/solr/contrib/dataimporthandler-extras/pom.xml new file mode 100644 index 0000000..322bb03 --- /dev/null +++ b/maven-build/solr/contrib/dataimporthandler-extras/pom.xml @@ -0,0 +1,473 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-dataimporthandler-extras + jar + Apache Solr DataImportHandler Extras + Apache Solr DataImportHandler Extras + + solr/contrib/dataimporthandler-extras + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-dataimporthandler + ${project.version} + test-jar + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-dataimporthandler + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + javax.activation + activation + + + javax.mail + mail + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + diff --git a/maven-build/solr/contrib/dataimporthandler/pom.xml b/maven-build/solr/contrib/dataimporthandler/pom.xml new file mode 100644 index 0000000..9b24ea4 --- /dev/null +++ b/maven-build/solr/contrib/dataimporthandler/pom.xml @@ -0,0 +1,361 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-dataimporthandler + jar + Apache Solr DataImportHandler + Apache Solr DataImportHandler + + solr/contrib/dataimporthandler + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + javax.activation + activation + + + javax.mail + mail + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + + hsqldb + hsqldb + test + + + org.apache.derby + derby + test + + + org.easymock + easymock + test + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + diff --git a/maven-build/solr/contrib/extraction/pom.xml b/maven-build/solr/contrib/extraction/pom.xml new file mode 100644 index 0000000..93137d3 --- /dev/null +++ b/maven-build/solr/contrib/extraction/pom.xml @@ -0,0 +1,476 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-cell + jar + Apache Solr Content Extraction Library + + Apache Solr Content Extraction Library integrates Apache Tika + content extraction framework into Solr + + + solr/contrib/extraction + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + + diff --git a/maven-build/solr/contrib/langid/pom.xml b/maven-build/solr/contrib/langid/pom.xml new file mode 100644 index 0000000..edac0bf --- /dev/null +++ b/maven-build/solr/contrib/langid/pom.xml @@ -0,0 +1,490 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-langid + jar + Apache Solr Language Identifier + + This module is intended to be used while indexing documents. + It is implemented as an UpdateProcessor to be placed in an UpdateChain. + Its purpose is to identify language from documents and tag the document with language code. + + + solr/contrib/langid + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.cybozu.labs + langdetect + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + net.arnx + jsonic + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + + diff --git a/maven-build/solr/contrib/map-reduce/pom.xml b/maven-build/solr/contrib/map-reduce/pom.xml new file mode 100644 index 0000000..86bdae1 --- /dev/null +++ b/maven-build/solr/contrib/map-reduce/pom.xml @@ -0,0 +1,773 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-map-reduce + jar + Apache Solr map-reduce index construction + Apache Solr - map-reduce index construction + + solr/contrib/map-reduce + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-morphlines-core + ${project.version} + test-jar + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-cell + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-morphlines-cell + + + uk.co.flax.lucene-solr-intervals + solr-morphlines-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.codahale.metrics + metrics-core + + + com.codahale.metrics + metrics-healthchecks + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + com.typesafe + config + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + net.sf.saxon + Saxon-HE + + + net.sourceforge.argparse4j + argparse4j + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.kitesdk + kite-morphlines-avro + + + org.kitesdk + kite-morphlines-core + + + org.kitesdk + kite-morphlines-hadoop-sequencefile + + + org.kitesdk + kite-morphlines-json + + + org.kitesdk + kite-morphlines-saxon + + + org.kitesdk + kite-morphlines-tika-core + + + org.kitesdk + kite-morphlines-tika-decompress + + + org.kitesdk + kite-morphlines-twitter + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + aopalliance + aopalliance + test + + + com.fasterxml.jackson.core + jackson-annotations + test + + + com.fasterxml.jackson.core + jackson-core + test + + + com.fasterxml.jackson.core + jackson-databind + test + + + com.google.inject + guice + test + + + com.google.inject.extensions + guice-servlet + test + + + com.sun.jersey + jersey-bundle + test + + + com.sun.jersey + jersey-core + test + + + com.sun.jersey + jersey-json + test + + + com.sun.jersey + jersey-server + test + + + com.sun.jersey.contribs + jersey-guice + test + + + com.sun.xml.bind + jaxb-impl + test + + + com.thoughtworks.paranamer + paranamer + test + + + commons-collections + commons-collections + test + + + io.netty + netty + test + + + javax.inject + javax.inject + test + + + org.apache.avro + avro + test + + + org.apache.hadoop + hadoop-common + test + tests + + + org.apache.hadoop + hadoop-hdfs + test + tests + + + org.apache.hadoop + hadoop-mapreduce-client-app + test + + + org.apache.hadoop + hadoop-mapreduce-client-common + test + + + org.apache.hadoop + hadoop-mapreduce-client-hs + test + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + test + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + test + tests + + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + test + + + org.apache.hadoop + hadoop-yarn-api + test + + + org.apache.hadoop + hadoop-yarn-client + test + + + org.apache.hadoop + hadoop-yarn-common + test + + + org.apache.hadoop + hadoop-yarn-server-common + test + + + org.apache.hadoop + hadoop-yarn-server-nodemanager + test + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + test + + + org.apache.hadoop + hadoop-yarn-server-tests + test + tests + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + test + + + org.apache.mrunit + mrunit + test + hadoop2 + + + org.codehaus.jackson + jackson-core-asl + test + + + org.codehaus.jackson + jackson-jaxrs + test + + + org.codehaus.jackson + jackson-mapper-asl + test + + + org.kitesdk + kite-morphlines-core + test + tests + + + org.mockito + mockito-core + test + + + org.mortbay.jetty + jetty + test + + + org.mortbay.jetty + jetty-util + test + + + org.objenesis + objenesis + test + + + org.xerial.snappy + snappy-java + test + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + + ${module-path}/../morphlines-core/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + + diff --git a/maven-build/solr/contrib/morphlines-cell/pom.xml b/maven-build/solr/contrib/morphlines-cell/pom.xml new file mode 100644 index 0000000..d8b839d --- /dev/null +++ b/maven-build/solr/contrib/morphlines-cell/pom.xml @@ -0,0 +1,754 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-morphlines-cell + jar + Apache Solr Cell Morphlines + Apache Solr - Cell Morphlines + + solr/contrib/morphlines-cell + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-morphlines-core + ${project.version} + test-jar + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-cell + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-morphlines-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.codahale.metrics + metrics-core + + + com.codahale.metrics + metrics-healthchecks + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + com.typesafe + config + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.kitesdk + kite-morphlines-avro + + + org.kitesdk + kite-morphlines-core + + + org.kitesdk + kite-morphlines-json + + + org.kitesdk + kite-morphlines-tika-core + + + org.kitesdk + kite-morphlines-tika-decompress + + + org.kitesdk + kite-morphlines-twitter + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + aopalliance + aopalliance + test + + + com.fasterxml.jackson.core + jackson-annotations + test + + + com.fasterxml.jackson.core + jackson-core + test + + + com.fasterxml.jackson.core + jackson-databind + test + + + com.google.inject + guice + test + + + com.google.inject.extensions + guice-servlet + test + + + com.sun.jersey + jersey-bundle + test + + + com.sun.jersey + jersey-core + test + + + com.sun.jersey + jersey-json + test + + + com.sun.jersey + jersey-server + test + + + com.sun.jersey.contribs + jersey-guice + test + + + com.sun.xml.bind + jaxb-impl + test + + + com.thoughtworks.paranamer + paranamer + test + + + commons-collections + commons-collections + test + + + io.netty + netty + test + + + javax.inject + javax.inject + test + + + org.apache.avro + avro + test + + + org.apache.hadoop + hadoop-common + test + tests + + + org.apache.hadoop + hadoop-hdfs + test + tests + + + org.apache.hadoop + hadoop-mapreduce-client-app + test + + + org.apache.hadoop + hadoop-mapreduce-client-common + test + + + org.apache.hadoop + hadoop-mapreduce-client-core + test + + + org.apache.hadoop + hadoop-mapreduce-client-hs + test + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + test + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + test + tests + + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + test + + + org.apache.hadoop + hadoop-yarn-api + test + + + org.apache.hadoop + hadoop-yarn-client + test + + + org.apache.hadoop + hadoop-yarn-common + test + + + org.apache.hadoop + hadoop-yarn-server-common + test + + + org.apache.hadoop + hadoop-yarn-server-nodemanager + test + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + test + + + org.apache.hadoop + hadoop-yarn-server-tests + test + tests + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + test + + + org.apache.mrunit + mrunit + test + hadoop2 + + + org.codehaus.jackson + jackson-core-asl + test + + + org.codehaus.jackson + jackson-jaxrs + test + + + org.codehaus.jackson + jackson-mapper-asl + test + + + org.kitesdk + kite-morphlines-core + test + tests + + + org.mockito + mockito-core + test + + + org.mortbay.jetty + jetty + test + + + org.mortbay.jetty + jetty-util + test + + + org.objenesis + objenesis + test + + + org.xerial.snappy + snappy-java + test + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + + ${module-path}/../morphlines-core/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + + diff --git a/maven-build/solr/contrib/morphlines-core/pom.xml b/maven-build/solr/contrib/morphlines-core/pom.xml new file mode 100644 index 0000000..4b0218a --- /dev/null +++ b/maven-build/solr/contrib/morphlines-core/pom.xml @@ -0,0 +1,734 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-morphlines-core + jar + Apache Solr Morphlines Core + Apache Solr - Morphlines Core + + solr/contrib/morphlines-core + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-cell + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + asm + asm + + + com.adobe.xmp + xmpcore + + + com.carrotsearch + hppc + + + com.codahale.metrics + metrics-core + + + com.codahale.metrics + metrics-healthchecks + + + com.drewnoakes + metadata-extractor + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.googlecode.juniversalchardet + juniversalchardet + + + com.googlecode.mp4parser + isoparser + + + com.ibm.icu + icu4j + + + com.spatial4j + spatial4j + + + com.typesafe + config + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + de.l3s.boilerpipe + boilerpipe + + + dom4j + dom4j + + + edu.ucar + netcdf + + + jdom + jdom + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.commons + commons-compress + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.james + apache-mime4j-core + + + org.apache.james + apache-mime4j-dom + + + org.apache.pdfbox + fontbox + + + org.apache.pdfbox + jempbox + + + org.apache.pdfbox + pdfbox + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + org.apache.poi + poi-ooxml-schemas + + + org.apache.poi + poi-scratchpad + + + org.apache.tika + tika-core + + + org.apache.tika + tika-parsers + + + org.apache.tika + tika-xmp + + + org.apache.xmlbeans + xmlbeans + + + org.apache.zookeeper + zookeeper + + + org.aspectj + aspectjrt + + + org.bouncycastle + bcmail-jdk15 + + + org.bouncycastle + bcprov-jdk15 + + + org.ccil.cowan.tagsoup + tagsoup + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.gagravarr + vorbis-java-core + + + org.gagravarr + vorbis-java-tika + + + org.kitesdk + kite-morphlines-avro + + + org.kitesdk + kite-morphlines-core + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + org.tukaani + xz + + + rome + rome + + + xerces + xercesImpl + + + + aopalliance + aopalliance + test + + + com.fasterxml.jackson.core + jackson-annotations + test + + + com.fasterxml.jackson.core + jackson-core + test + + + com.fasterxml.jackson.core + jackson-databind + test + + + com.google.inject + guice + test + + + com.google.inject.extensions + guice-servlet + test + + + com.sun.jersey + jersey-bundle + test + + + com.sun.jersey + jersey-core + test + + + com.sun.jersey + jersey-json + test + + + com.sun.jersey + jersey-server + test + + + com.sun.jersey.contribs + jersey-guice + test + + + com.sun.xml.bind + jaxb-impl + test + + + com.thoughtworks.paranamer + paranamer + test + + + commons-collections + commons-collections + test + + + io.netty + netty + test + + + javax.inject + javax.inject + test + + + org.apache.avro + avro + test + + + org.apache.hadoop + hadoop-common + test + + + org.apache.hadoop + hadoop-common + test + tests + + + org.apache.hadoop + hadoop-hdfs + test + tests + + + org.apache.hadoop + hadoop-mapreduce-client-app + test + + + org.apache.hadoop + hadoop-mapreduce-client-common + test + + + org.apache.hadoop + hadoop-mapreduce-client-core + test + + + org.apache.hadoop + hadoop-mapreduce-client-hs + test + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + test + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + test + tests + + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + test + + + org.apache.hadoop + hadoop-yarn-api + test + + + org.apache.hadoop + hadoop-yarn-client + test + + + org.apache.hadoop + hadoop-yarn-common + test + + + org.apache.hadoop + hadoop-yarn-server-common + test + + + org.apache.hadoop + hadoop-yarn-server-nodemanager + test + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + test + + + org.apache.hadoop + hadoop-yarn-server-tests + test + tests + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + test + + + org.apache.mrunit + mrunit + test + hadoop2 + + + org.codehaus.jackson + jackson-core-asl + test + + + org.codehaus.jackson + jackson-jaxrs + test + + + org.codehaus.jackson + jackson-mapper-asl + test + + + org.kitesdk + kite-morphlines-core + test + tests + + + org.mockito + mockito-core + test + + + org.mortbay.jetty + jetty + test + + + org.mortbay.jetty + jetty-util + test + + + org.objenesis + objenesis + test + + + org.xerial.snappy + snappy-java + test + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + diff --git a/maven-build/solr/contrib/pom.xml b/maven-build/solr/contrib/pom.xml new file mode 100644 index 0000000..a6857e3 --- /dev/null +++ b/maven-build/solr/contrib/pom.xml @@ -0,0 +1,55 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-contrib-aggregator + Apache Solr Contrib aggregator POM + pom + + analysis-extras + clustering + dataimporthandler + dataimporthandler-extras + extraction + langid + morphlines-cell + morphlines-core + map-reduce + uima + velocity + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/maven-build/solr/contrib/uima/pom.xml b/maven-build/solr/contrib/uima/pom.xml new file mode 100644 index 0000000..81e2507 --- /dev/null +++ b/maven-build/solr/contrib/uima/pom.xml @@ -0,0 +1,378 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-uima + jar + Apache Solr UIMA integration + Apache Solr - UIMA integration + + solr/contrib/uima + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-uima + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-digester + commons-digester + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.uima + AlchemyAPIAnnotator + + + org.apache.uima + OpenCalaisAnnotator + + + org.apache.uima + Tagger + + + org.apache.uima + WhitespaceTokenizer + + + org.apache.uima + uimaj-core + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/resources + + + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + + diff --git a/maven-build/solr/contrib/velocity/pom.xml b/maven-build/solr/contrib/velocity/pom.xml new file mode 100644 index 0000000..78dc08e --- /dev/null +++ b/maven-build/solr/contrib/velocity/pom.xml @@ -0,0 +1,361 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-velocity + jar + Apache Solr Velocity + Apache Solr Velocity + + solr/contrib/velocity + ../../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-beanutils + commons-beanutils + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-collections + commons-collections + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.velocity + velocity + + + org.apache.velocity + velocity-tools + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path}/src/test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + de.thetaphi + forbiddenapis + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + + + + diff --git a/maven-build/solr/core/pom.xml b/maven-build/solr/core/pom.xml new file mode 100644 index 0000000..b54c04d --- /dev/null +++ b/maven-build/solr/core/pom.xml @@ -0,0 +1,46 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-core-aggregator + pom + Apache Solr Core aggregator POM + + src/java + src/test + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/maven-build/solr/core/src/java/pom.xml b/maven-build/solr/core/src/java/pom.xml new file mode 100644 index 0000000..a510cf0 --- /dev/null +++ b/maven-build/solr/core/src/java/pom.xml @@ -0,0 +1,347 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-core + jar + Apache Solr Core + Apache Solr Core + + solr/core + ../../../../.. + ${relative-top-level}/${module-directory}/src/java + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + + ${module-path} + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + org.apache.maven.plugins + maven-compiler-plugin + + true + + + + de.thetaphi + forbiddenapis + + + solr-shared-check-forbidden-apis + none + + + solr-shared-test-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + commons-io-unsafe-2.1 + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + ${top-level}/lucene/tools/forbiddenApis/rue.txt + + + + check + + + + + + + diff --git a/maven-build/solr/core/src/test/pom.xml b/maven-build/solr/core/src/test/pom.xml new file mode 100644 index 0000000..0275492 --- /dev/null +++ b/maven-build/solr/core/src/test/pom.xml @@ -0,0 +1,525 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-core-tests + Apache Solr Core tests + jar + + solr/core + ../../../../.. + ${relative-top-level}/${module-directory}/src/test + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-core + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + test + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + test + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + test + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + test + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + test + + + uk.co.flax.lucene-solr-intervals + lucene-join + test + + + uk.co.flax.lucene-solr-intervals + lucene-memory + test + + + uk.co.flax.lucene-solr-intervals + lucene-misc + test + + + uk.co.flax.lucene-solr-intervals + lucene-queries + test + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + test + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + test + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + test + + + uk.co.flax.lucene-solr-intervals + solr-solrj + test + + + cglib + cglib-nodep + test + + + com.carrotsearch + hppc + test + + + com.google.guava + guava + test + + + com.google.protobuf + protobuf-java + test + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + test + + + com.spatial4j + spatial4j + test + + + com.sun.jersey + jersey-core + test + + + commons-cli + commons-cli + test + + + commons-codec + commons-codec + test + + + commons-collections + commons-collections + test + + + commons-configuration + commons-configuration + test + + + commons-fileupload + commons-fileupload + test + + + commons-io + commons-io + test + + + commons-lang + commons-lang + test + + + dom4j + dom4j + test + + + javax.servlet + javax.servlet-api + test + + + joda-time + joda-time + test + + + log4j + log4j + test + + + org.antlr + antlr-runtime + test + + + org.apache.hadoop + hadoop-annotations + test + + + org.apache.hadoop + hadoop-auth + test + + + org.apache.hadoop + hadoop-common + test + + + org.apache.hadoop + hadoop-common + test + tests + + + org.apache.hadoop + hadoop-hdfs + test + + + org.apache.hadoop + hadoop-hdfs + test + tests + + + org.apache.httpcomponents + httpclient + test + + + org.apache.httpcomponents + httpcore + test + + + org.apache.httpcomponents + httpmime + test + + + org.apache.zookeeper + zookeeper + test + + + org.codehaus.woodstox + wstx-asl + test + + + org.easymock + easymock + test + + + org.eclipse.jetty + jetty-continuation + test + + + org.eclipse.jetty + jetty-deploy + test + + + org.eclipse.jetty + jetty-http + test + + + org.eclipse.jetty + jetty-io + test + + + org.eclipse.jetty + jetty-jmx + test + + + org.eclipse.jetty + jetty-security + test + + + org.eclipse.jetty + jetty-server + test + + + org.eclipse.jetty + jetty-servlet + test + + + org.eclipse.jetty + jetty-util + test + + + org.eclipse.jetty + jetty-webapp + test + + + org.eclipse.jetty + jetty-xml + test + + + org.eclipse.jetty.orbit + javax.servlet + test + + + org.mortbay.jetty + jetty + test + + + org.mortbay.jetty + jetty-util + test + + + org.noggit + noggit + test + + + org.objenesis + objenesis + test + + + org.ow2.asm + asm + test + + + org.ow2.asm + asm-commons + test + + + org.restlet.jee + org.restlet + test + + + org.restlet.jee + org.restlet.ext.servlet + test + + + org.slf4j + jcl-over-slf4j + test + true + + + org.slf4j + jul-to-slf4j + test + true + + + org.slf4j + slf4j-api + test + true + + + org.slf4j + slf4j-log4j12 + test + true + + + + + + ../java/target/classes + ${module-path} + + + ${module-path}/../test-files + + + ${project.build.testSourceDirectory} + + **/*.java + + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + org.apache.maven.plugins + maven-jar-plugin + + + default-jar + + none + + + + + org.apache.maven.plugins + maven-install-plugin + + true + + + + org.apache.felix + maven-bundle-plugin + 2.3.7 + + true + + + + de.thetaphi + forbiddenapis + + + solr-shared-check-forbidden-apis + none + + + solr-shared-test-check-forbidden-apis + none + + + test-check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + commons-io-unsafe-2.1 + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + testCheck + + + + test-check-forbidden-test-apis + + + ${top-level}/lucene/tools/forbiddenApis/tests.txt + + + + org/apache/solr/search/DocSetPerf.class + + org/apache/solr/internal/**/*.class + + + + testCheck + + + + + + + diff --git a/maven-build/solr/pom.xml b/maven-build/solr/pom.xml new file mode 100644 index 0000000..574e2f7 --- /dev/null +++ b/maven-build/solr/pom.xml @@ -0,0 +1,179 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + lucene-solr-grandparent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-parent + pom + Apache Solr parent POM + Apache Solr parent POM + + core + solrj + test-framework + contrib + + + LUCENE_CURRENT + solr + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + JIRA + https://issues.apache.org/jira/browse/SOLR + + + + Solr User List + solr-user-subscribe@lucene.apache.org + solr-user-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/solr-user/ + + + + Java Developer List + dev-subscribe@lucene.apache.org + dev-unsubscribe@lucene.apache.org + http://mail-archives.apache.org/mod_mbox/lucene-dev/ + + + Java Commits List + commits-subscribe@lucene.apache.org + commits-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-java-commits/ + + + + 2006 + + + maven-restlet + Public online Restlet repository + http://maven.restlet.org + + + releases.cloudera.com + Cloudera Releases + https://repository.cloudera.com/artifactory/libs-release + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + ${project.name} ${project.version} API (${now.version}) + ${project.name} ${project.version} API (${now.version}) + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${tests.jettyConnector} + ${tests.disableHdfs} + + + + + + + + de.thetaphi + forbiddenapis + + + solr-shared-check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + ${top-level}/lucene/tools/forbiddenApis/rue.txt + + + + check + + + + solr-shared-test-check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + + + + + ${top-level}/lucene/tools/forbiddenApis/tests.txt + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + testCheck + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${top-level}/solr/testlogging.properties + + + + + + + + windows-tests-disableHdfs + + windows + + + true + + + + diff --git a/maven-build/solr/solrj/pom.xml b/maven-build/solr/solrj/pom.xml new file mode 100644 index 0000000..2ab2259 --- /dev/null +++ b/maven-build/solr/solrj/pom.xml @@ -0,0 +1,46 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-solrj-aggregator + pom + Apache Solr Solrj aggregator POM + + src/java + src/test + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/maven-build/solr/solrj/src/java/pom.xml b/maven-build/solr/solrj/src/java/pom.xml new file mode 100644 index 0000000..24190b4 --- /dev/null +++ b/maven-build/solr/solrj/src/java/pom.xml @@ -0,0 +1,151 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-solrj + jar + Apache Solr Solrj + Apache Solr Solrj + + solr/solrj + ../../../../.. + ${relative-top-level}/${module-directory}/src/java + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + commons-io + commons-io + + + log4j + log4j + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.noggit + noggit + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + + ${module-path} + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + org.apache.maven.plugins + maven-compiler-plugin + + true + + + + de.thetaphi + forbiddenapis + + + solr-shared-check-forbidden-apis + none + + + solr-shared-test-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + commons-io-unsafe-2.1 + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + + + + + + check + + + + + + + diff --git a/maven-build/solr/solrj/src/test/pom.xml b/maven-build/solr/solrj/src/test/pom.xml new file mode 100644 index 0000000..4d13f21 --- /dev/null +++ b/maven-build/solr/solrj/src/test/pom.xml @@ -0,0 +1,465 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../../../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-solrj-tests + Apache Solr Solrj tests + jar + + solr/solrj + ../../../../.. + ${relative-top-level}/${module-directory}/src/test + + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-test-framework + test + + + uk.co.flax.lucene-solr-intervals + solr-solrj + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + test + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + test + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + test + + + uk.co.flax.lucene-solr-intervals + lucene-core + test + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + test + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + test + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + test + + + uk.co.flax.lucene-solr-intervals + lucene-join + test + + + uk.co.flax.lucene-solr-intervals + lucene-memory + test + + + uk.co.flax.lucene-solr-intervals + lucene-misc + test + + + uk.co.flax.lucene-solr-intervals + lucene-queries + test + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + test + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + test + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + test + + + uk.co.flax.lucene-solr-intervals + solr-core + test + + + com.carrotsearch + hppc + test + + + com.google.guava + guava + test + + + com.google.protobuf + protobuf-java + test + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + test + + + com.spatial4j + spatial4j + test + + + commons-cli + commons-cli + test + + + commons-codec + commons-codec + test + + + commons-configuration + commons-configuration + test + + + commons-fileupload + commons-fileupload + test + + + commons-io + commons-io + test + + + commons-lang + commons-lang + test + + + dom4j + dom4j + test + + + joda-time + joda-time + test + + + log4j + log4j + test + + + org.antlr + antlr-runtime + test + + + org.apache.hadoop + hadoop-annotations + test + + + org.apache.hadoop + hadoop-auth + test + + + org.apache.hadoop + hadoop-common + test + + + org.apache.hadoop + hadoop-hdfs + test + + + org.apache.httpcomponents + httpclient + test + + + org.apache.httpcomponents + httpcore + test + + + org.apache.httpcomponents + httpmime + test + + + org.apache.zookeeper + zookeeper + test + + + org.codehaus.woodstox + wstx-asl + test + + + org.eclipse.jetty + jetty-continuation + test + + + org.eclipse.jetty + jetty-deploy + test + + + org.eclipse.jetty + jetty-http + test + + + org.eclipse.jetty + jetty-io + test + + + org.eclipse.jetty + jetty-jmx + test + + + org.eclipse.jetty + jetty-security + test + + + org.eclipse.jetty + jetty-server + test + + + org.eclipse.jetty + jetty-servlet + test + + + org.eclipse.jetty + jetty-util + test + + + org.eclipse.jetty + jetty-webapp + test + + + org.eclipse.jetty + jetty-xml + test + + + org.eclipse.jetty.orbit + javax.servlet + test + + + org.noggit + noggit + test + + + org.ow2.asm + asm + test + + + org.ow2.asm + asm-commons + test + + + org.restlet.jee + org.restlet + test + + + org.restlet.jee + org.restlet.ext.servlet + test + + + org.slf4j + jcl-over-slf4j + test + true + + + org.slf4j + jul-to-slf4j + test + true + + + org.slf4j + slf4j-api + test + true + + + org.slf4j + slf4j-log4j12 + test + true + + + + + + ../java/target/classes + ${module-path} + + + ${module-path}/../test-files + + + ${top-level}/dev-tools/maven/solr + + maven.testlogging.properties + + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + org.apache.maven.plugins + maven-jar-plugin + + + default-jar + + none + + + + + org.apache.maven.plugins + maven-install-plugin + + true + + + + org.apache.felix + maven-bundle-plugin + 2.3.7 + + true + + + + de.thetaphi + forbiddenapis + + + solr-shared-check-forbidden-apis + none + + + solr-shared-test-check-forbidden-apis + none + + + test-check-forbidden-servlet-api + + + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + + + + org/apache/solr/client/solrj/impl/BasicHttpSolrServerTest$DebugServlet.class + + + + testCheck + + + + test-check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + commons-io-unsafe-2.1 + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + ${top-level}/lucene/tools/forbiddenApis/tests.txt + + + + testCheck + + + + + + + diff --git a/maven-build/solr/test-framework/pom.xml b/maven-build/solr/test-framework/pom.xml new file mode 100644 index 0000000..2f4e059 --- /dev/null +++ b/maven-build/solr/test-framework/pom.xml @@ -0,0 +1,379 @@ + + + 4.0.0 + + uk.co.flax.lucene-solr-intervals + solr-parent + r1581360-intervals-1.3.1-SNAPSHOT + ../pom.xml + + uk.co.flax.lucene-solr-intervals + solr-test-framework + jar + Apache Solr Test Framework + Apache Solr Test Framework + + solr/test-framework + ../../.. + ${relative-top-level}/${module-directory} + + + scm:svn:${vc-anonymous-base-url}/${module-directory} + scm:svn:${vc-dev-base-url}/${module-directory} + ${vc-browse-base-url}/${module-directory} + + + + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-common + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-kuromoji + + + uk.co.flax.lucene-solr-intervals + lucene-analyzers-phonetic + + + uk.co.flax.lucene-solr-intervals + lucene-codecs + + + uk.co.flax.lucene-solr-intervals + lucene-core + + + uk.co.flax.lucene-solr-intervals + lucene-expressions + + + uk.co.flax.lucene-solr-intervals + lucene-grouping + + + uk.co.flax.lucene-solr-intervals + lucene-highlighter + + + uk.co.flax.lucene-solr-intervals + lucene-join + + + uk.co.flax.lucene-solr-intervals + lucene-memory + + + uk.co.flax.lucene-solr-intervals + lucene-misc + + + uk.co.flax.lucene-solr-intervals + lucene-queries + + + uk.co.flax.lucene-solr-intervals + lucene-queryparser + + + uk.co.flax.lucene-solr-intervals + lucene-spatial + + + uk.co.flax.lucene-solr-intervals + lucene-suggest + + + uk.co.flax.lucene-solr-intervals + lucene-test-framework + + + uk.co.flax.lucene-solr-intervals + solr-core + + + uk.co.flax.lucene-solr-intervals + solr-solrj + + + com.carrotsearch + hppc + + + com.carrotsearch.randomizedtesting + junit4-ant + + + com.carrotsearch.randomizedtesting + randomizedtesting-runner + + + com.google.guava + guava + + + com.google.protobuf + protobuf-java + + + com.googlecode.concurrentlinkedhashmap + concurrentlinkedhashmap-lru + + + com.spatial4j + spatial4j + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-configuration + commons-configuration + + + commons-fileupload + commons-fileupload + + + commons-io + commons-io + + + commons-lang + commons-lang + + + dom4j + dom4j + + + joda-time + joda-time + + + junit + junit + + + log4j + log4j + + + org.antlr + antlr-runtime + + + org.apache.ant + ant + + + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.apache.httpcomponents + httpmime + + + org.apache.zookeeper + zookeeper + + + org.codehaus.woodstox + wstx-asl + + + org.eclipse.jetty + jetty-continuation + + + org.eclipse.jetty + jetty-deploy + + + org.eclipse.jetty + jetty-http + + + org.eclipse.jetty + jetty-io + + + org.eclipse.jetty + jetty-jmx + + + org.eclipse.jetty + jetty-security + + + org.eclipse.jetty + jetty-server + + + org.eclipse.jetty + jetty-servlet + + + org.eclipse.jetty + jetty-util + + + org.eclipse.jetty + jetty-webapp + + + org.eclipse.jetty + jetty-xml + + + org.eclipse.jetty.orbit + javax.servlet + + + org.noggit + noggit + + + org.ow2.asm + asm + + + org.ow2.asm + asm-commons + + + org.restlet.jee + org.restlet + + + org.restlet.jee + org.restlet.ext.servlet + + + org.slf4j + jcl-over-slf4j + true + + + org.slf4j + jul-to-slf4j + true + + + org.slf4j + slf4j-api + true + + + org.slf4j + slf4j-log4j12 + true + + + + + + ${module-path}/src/java + ${module-path}/src/test + + + ${module-path} + + **/*.java + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + de.thetaphi + forbiddenapis + + + solr-shared-check-forbidden-apis + none + + + solr-shared-test-check-forbidden-apis + none + + + check-forbidden-apis + + + true + + jdk-unsafe + jdk-deprecated + commons-io-unsafe-2.1 + + + ${top-level}/lucene/tools/forbiddenApis/base.txt + ${top-level}/lucene/tools/forbiddenApis/servlet-api.txt + ${top-level}/lucene/tools/forbiddenApis/tests.txt + + + + check + + + + + + + diff --git a/maven-build/update-groupId b/maven-build/update-groupId new file mode 100755 index 0000000..b9de4b6 --- /dev/null +++ b/maven-build/update-groupId @@ -0,0 +1,2 @@ +find . -name pom.xml | xargs perl -pi -e 's/>org.apache.luceneuk.co.flax.lucene-solr-intervalsorg.apache.solruk.co.flax.lucene-solr-intervals + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + 100000 + + + + + + + + explicit + + + + + + explicit + + + + diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java new file mode 100644 index 0000000..5f4595a --- /dev/null +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta2.java @@ -0,0 +1,288 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.dataimport; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + *

      + * Test for SqlEntityProcessor which checks variations in primary key names and deleted ids + *

      + * + * + * + * @since solr 1.3 + */ +@Ignore("FIXME: I fail so often it makes me ill!") +public class TestSqlEntityProcessorDelta2 extends AbstractDataImportHandlerTestCase { + private static final String FULLIMPORT_QUERY = "select * from x"; + + private static final String DELTA_QUERY = "select id from x where last_modified > NOW"; + + private static final String DELETED_PK_QUERY = "select id from x where last_modified > NOW AND deleted='true'"; + + private static final String dataConfig_delta2 = + "" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + "\n"; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("dataimport-solrconfig.xml", "dataimport-solr_id-schema.xml"); + } + + @Before @Override + public void setUp() throws Exception { + super.setUp(); + clearIndex(); + assertU(commit()); + } + + @SuppressWarnings("unchecked") + private void add1document() throws Exception { + List parentRow = new ArrayList(); + parentRow.add(createMap("id", "1")); + MockDataSource.setIterator(FULLIMPORT_QUERY, parentRow.iterator()); + + List childRow = new ArrayList(); + childRow.add(createMap("desc", "hello")); + MockDataSource.setIterator("select * from y where y.A='1'", childRow + .iterator()); + + runFullImport(dataConfig_delta2); + + assertQ(req("*:* OR add1document"), "//*[@numFound='1']"); + assertQ(req("solr_id:prefix-1"), "//*[@numFound='1']"); + assertQ(req("desc:hello"), "//*[@numFound='1']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_FullImport() throws Exception { + add1document(); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_delete() throws Exception { + add1document(); + List deletedRow = new ArrayList(); + deletedRow.add(createMap("id", "1")); + MockDataSource.setIterator(DELETED_PK_QUERY, deletedRow.iterator()); + + MockDataSource.setIterator(DELTA_QUERY, Collections + .EMPTY_LIST.iterator()); + + List childRow = new ArrayList(); + childRow.add(createMap("desc", "hello")); + MockDataSource.setIterator("select * from y where y.A='1'", childRow + .iterator()); + + runDeltaImport(dataConfig_delta2); + assertQ(req("*:* OR testCompositePk_DeltaImport_delete"), "//*[@numFound='0']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_empty() throws Exception { + List deltaRow = new ArrayList(); + deltaRow.add(createMap("id", "1")); + MockDataSource.setIterator(DELTA_QUERY, deltaRow.iterator()); + + MockDataSource.setIterator(DELETED_PK_QUERY, Collections + .EMPTY_LIST.iterator()); + + List parentRow = new ArrayList(); + parentRow.add(createMap("id", "1")); + MockDataSource.setIterator("select * from x where id='1'", parentRow + .iterator()); + + List childRow = new ArrayList(); + childRow.add(createMap("desc", "hello")); + MockDataSource.setIterator("select * from y where y.A='1'", childRow + .iterator()); + + runDeltaImport(dataConfig_delta2); + + assertQ(req("*:* OR testCompositePk_DeltaImport_empty"), "//*[@numFound='1']"); + assertQ(req("solr_id:prefix-1"), "//*[@numFound='1']"); + assertQ(req("desc:hello"), "//*[@numFound='1']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_replace_delete() throws Exception { + add1document(); + MockDataSource.clearCache(); + + List deltaRow = new ArrayList(); + deltaRow.add(createMap("id", "1")); + MockDataSource.setIterator(DELTA_QUERY, + deltaRow.iterator()); + + List deletedRow = new ArrayList(); + deletedRow.add(createMap("id", "1")); + MockDataSource.setIterator(DELETED_PK_QUERY, + deletedRow.iterator()); + + List parentRow = new ArrayList(); + parentRow.add(createMap("id", "1")); + MockDataSource.setIterator("select * from x where id='1'", parentRow + .iterator()); + + List childRow = new ArrayList(); + childRow.add(createMap("desc", "goodbye")); + MockDataSource.setIterator("select * from y where y.A='1'", childRow + .iterator()); + + runDeltaImport(dataConfig_delta2); + + assertQ(req("*:* OR testCompositePk_DeltaImport_replace_delete"), "//*[@numFound='0']"); + } + + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_replace_nodelete() throws Exception { + add1document(); + MockDataSource.clearCache(); + + List deltaRow = new ArrayList(); + deltaRow.add(createMap("id", "1")); + MockDataSource.setIterator(DELTA_QUERY, + deltaRow.iterator()); + + MockDataSource.setIterator(DELETED_PK_QUERY, Collections + .EMPTY_LIST.iterator()); + + List parentRow = new ArrayList(); + parentRow.add(createMap("id", "1")); + MockDataSource.setIterator("select * from x where id='1'", parentRow + .iterator()); + + List childRow = new ArrayList(); + childRow.add(createMap("desc", "goodbye")); + MockDataSource.setIterator("select * from y where y.A='1'", childRow + .iterator()); + + runDeltaImport(dataConfig_delta2); + + assertQ(req("*:* OR XtestCompositePk_DeltaImport_replace_nodelete"), "//*[@numFound='1']"); + assertQ(req("solr_id:prefix-1"), "//*[@numFound='1']"); + assertQ(req("desc:hello OR XtestCompositePk_DeltaImport_replace_nodelete"), "//*[@numFound='0']"); + assertQ(req("desc:goodbye"), "//*[@numFound='1']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_add() throws Exception { + add1document(); + MockDataSource.clearCache(); + + List deltaRow = new ArrayList(); + deltaRow.add(createMap("id", "2")); + MockDataSource.setIterator(DELTA_QUERY, + deltaRow.iterator()); + + List parentRow = new ArrayList(); + parentRow.add(createMap("id", "2")); + MockDataSource.setIterator("select * from x where id='2'", parentRow + .iterator()); + + List childRow = new ArrayList(); + childRow.add(createMap("desc", "goodbye")); + MockDataSource.setIterator("select * from y where y.A='2'", childRow + .iterator()); + + runDeltaImport(dataConfig_delta2); + + assertQ(req("*:* OR testCompositePk_DeltaImport_add"), "//*[@numFound='2']"); + assertQ(req("solr_id:prefix-1"), "//*[@numFound='1']"); + assertQ(req("solr_id:prefix-2"), "//*[@numFound='1']"); + assertQ(req("desc:hello"), "//*[@numFound='1']"); + assertQ(req("desc:goodbye"), "//*[@numFound='1']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_nodelta() throws Exception { + add1document(); + MockDataSource.clearCache(); + + MockDataSource.setIterator(DELTA_QUERY, + Collections.EMPTY_LIST.iterator()); + + runDeltaImport(dataConfig_delta2); + + assertQ(req("*:* OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); + assertQ(req("solr_id:prefix-1 OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); + assertQ(req("desc:hello OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_add_delete() throws Exception { + add1document(); + MockDataSource.clearCache(); + + List deltaRow = new ArrayList(); + deltaRow.add(createMap("id", "2")); + MockDataSource.setIterator(DELTA_QUERY, + deltaRow.iterator()); + + List deletedRow = new ArrayList(); + deletedRow.add(createMap("id", "1")); + MockDataSource.setIterator(DELETED_PK_QUERY, + deletedRow.iterator()); + + List parentRow = new ArrayList(); + parentRow.add(createMap("id", "2")); + MockDataSource.setIterator("select * from x where id='2'", parentRow + .iterator()); + + List childRow = new ArrayList(); + childRow.add(createMap("desc", "goodbye")); + MockDataSource.setIterator("select * from y where y.A='2'", childRow + .iterator()); + + runDeltaImport(dataConfig_delta2); + + assertQ(req("*:* OR XtestCompositePk_DeltaImport_add_delete"), "//*[@numFound='1']"); + assertQ(req("solr_id:prefix-2"), "//*[@numFound='1']"); + assertQ(req("desc:hello"), "//*[@numFound='0']"); + assertQ(req("desc:goodbye"), "//*[@numFound='1']"); + } +} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta3.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta3.java new file mode 100644 index 0000000..cf5cc28 --- /dev/null +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDelta3.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.dataimport; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +@Ignore("FIXME: I fail so often it makes me ill!") +public class TestSqlEntityProcessorDelta3 extends AbstractDataImportHandlerTestCase { + private static final String P_FULLIMPORT_QUERY = "select * from parent"; + private static final String P_DELTA_QUERY = "select parent_id from parent where last_modified > NOW"; + private static final String P_DELTAIMPORT_QUERY = "select * from parent where last_modified > NOW AND parent_id=${dih.delta.parent_id}"; + + private static final String C_FULLIMPORT_QUERY = "select * from child"; + private static final String C_DELETED_PK_QUERY = "select id from child where last_modified > NOW AND deleted='true'"; + private static final String C_DELTA_QUERY = "select id from child where last_modified > NOW"; + private static final String C_PARENTDELTA_QUERY = "select parent_id from child where id=${child.id}"; + private static final String C_DELTAIMPORT_QUERY = "select * from child where last_modified > NOW AND parent_id=${dih.delta.parent_id}"; + + private static final String dataConfig_delta = + "" + + " \n" + + " " + + " " + + " " + + " " + + " " + + " " + + " " + + " " + + "\n"; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("dataimport-solrconfig.xml", "dataimport-schema.xml"); + } + + @Before @Override + public void setUp() throws Exception { + super.setUp(); + clearIndex(); + assertU(commit()); + } + + @SuppressWarnings("unchecked") + private void add1document() throws Exception { + List parentRow = new ArrayList(); + parentRow.add(createMap("parent_id", "1", "desc", "d1")); + MockDataSource.setIterator(P_FULLIMPORT_QUERY, parentRow.iterator()); + + List childRow = new ArrayList(); + childRow.add(createMap("id", "2")); + MockDataSource.setIterator(C_FULLIMPORT_QUERY, childRow.iterator()); + + runFullImport(dataConfig_delta); + + assertQ(req("*:* OR add1document"), "//*[@numFound='1']"); + assertQ(req("id:1"), "//*[@numFound='0']"); + assertQ(req("id:2"), "//*[@numFound='1']"); + assertQ(req("desc:d1"), "//*[@numFound='1']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_FullImport() throws Exception { + add1document(); + } + + // WORKS + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_delete() throws Exception { + add1document(); + List deletedRow = new ArrayList(); + deletedRow.add(createMap("id", "2")); + MockDataSource.setIterator(C_DELETED_PK_QUERY, deletedRow.iterator()); + MockDataSource.setIterator(C_DELTA_QUERY, Collections.EMPTY_LIST.iterator()); + + List deletedParentRow = new ArrayList(); + deletedParentRow.add(createMap("parent_id", "1")); + MockDataSource.setIterator("select parent_id from child where id=2", deletedParentRow.iterator()); + + runDeltaImport(dataConfig_delta); + assertQ(req("*:* OR testCompositePk_DeltaImport_delete"), "//*[@numFound='0']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_empty() throws Exception { + List childDeltaRow = new ArrayList(); + childDeltaRow.add(createMap("id", "2")); + MockDataSource.setIterator(C_DELTA_QUERY, childDeltaRow.iterator()); + MockDataSource.setIterator(C_DELETED_PK_QUERY, Collections.EMPTY_LIST.iterator()); + + List childParentDeltaRow = new ArrayList(); + childParentDeltaRow.add(createMap("parent_id", "1")); + MockDataSource.setIterator("select parent_id from child where id=2", childParentDeltaRow.iterator()); + + MockDataSource.setIterator(P_DELTA_QUERY, Collections.EMPTY_LIST.iterator()); + + List parentDeltaImportRow = new ArrayList(); + parentDeltaImportRow.add(createMap("parent_id", "1", "desc", "d1")); + MockDataSource.setIterator("select * from parent where last_modified > NOW AND parent_id=1", + parentDeltaImportRow.iterator()); + + List childDeltaImportRow = new ArrayList(); + childDeltaImportRow.add(createMap("id", "2")); + MockDataSource.setIterator("select * from child where last_modified > NOW AND parent_id=1", + childDeltaImportRow.iterator()); + + runDeltaImport(dataConfig_delta); + + assertQ(req("*:* OR testCompositePk_DeltaImport_empty"), "//*[@numFound='1']"); + assertQ(req("id:2"), "//*[@numFound='1']"); + assertQ(req("desc:d1"), "//*[@numFound='1']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_replace_nodelete() throws Exception { + add1document(); + MockDataSource.clearCache(); + + List deltaRow = new ArrayList(); + deltaRow.add(createMap("parent_id", "1")); + MockDataSource.setIterator(P_DELTA_QUERY, + deltaRow.iterator()); + + List parentRow = new ArrayList(); + parentRow.add(createMap("parent_id", "1", "desc", "d2")); + MockDataSource.setIterator("select * from parent where last_modified > NOW AND parent_id=1", + parentRow.iterator()); + + List childRow = new ArrayList(); + childRow.add(createMap("id", "2")); + MockDataSource.setIterator("select * from child where last_modified > NOW AND parent_id=1", + childRow.iterator()); + + MockDataSource.setIterator(C_DELETED_PK_QUERY, Collections + .EMPTY_LIST.iterator()); + + runDeltaImport(dataConfig_delta); + + assertQ(req("*:* OR XtestCompositePk_DeltaImport_replace_nodelete"), "//*[@numFound='1']"); + assertQ(req("id:2"), "//*[@numFound='1']"); + assertQ(req("desc:s1 OR XtestCompositePk_DeltaImport_replace_nodelete"), "//*[@numFound='0']"); + assertQ(req("desc:d2"), "//*[@numFound='1']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_add() throws Exception { + add1document(); + MockDataSource.clearCache(); + + List parentDeltaRow = new ArrayList(); + parentDeltaRow.add(createMap("parent_id", "1")); + MockDataSource.setIterator(P_DELTA_QUERY, + parentDeltaRow.iterator()); + + List parentRow = new ArrayList(); + parentRow.add(createMap("parent_id", "1", "desc", "d1")); + MockDataSource.setIterator("select * from parent where last_modified > NOW AND parent_id=1", + parentRow.iterator()); + + List childDeltaRow = new ArrayList(); + childDeltaRow.add(createMap("id", "3")); + MockDataSource.setIterator(C_DELTA_QUERY, + childDeltaRow.iterator()); + + List childParentDeltaRow = new ArrayList(); + childParentDeltaRow.add(createMap("parent_id", "1")); + MockDataSource.setIterator("select parent_id from child where id='3'", + childParentDeltaRow.iterator()); + + List childRow = new ArrayList(); + childRow.add(createMap("id", "3")); + MockDataSource.setIterator("select * from child where last_modified > NOW AND parent_id=1", + childRow.iterator()); + + runDeltaImport(dataConfig_delta); + + assertQ(req("*:* OR testCompositePk_DeltaImport_add"), "//*[@numFound='2']"); + assertQ(req("id:2"), "//*[@numFound='1']"); + assertQ(req("id:3"), "//*[@numFound='1']"); + assertQ(req("desc:d1"), "//*[@numFound='2']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testCompositePk_DeltaImport_nodelta() throws Exception { + add1document(); + MockDataSource.clearCache(); + + MockDataSource.setIterator(P_DELTA_QUERY, + Collections.EMPTY_LIST.iterator()); + + MockDataSource.setIterator(C_DELTA_QUERY, + Collections.EMPTY_LIST.iterator()); + + runDeltaImport(dataConfig_delta); + + assertQ(req("*:* OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); + assertQ(req("id:2 OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); + assertQ(req("desc:d1 OR testCompositePk_DeltaImport_nodelta"), "//*[@numFound='1']"); + } +} diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDeltaPrefixedPk.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDeltaPrefixedPk.java new file mode 100644 index 0000000..0f236bf --- /dev/null +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessorDeltaPrefixedPk.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.dataimport; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.logging.*; + +/** + *

      + * Test for SqlEntityProcessorDelta verifying fix for SOLR-1191 + *

      + * + * + * + * @since solr 3.1 + */ +@Ignore("FIXME: I fail so often it makes me ill!") +public class TestSqlEntityProcessorDeltaPrefixedPk extends AbstractDataImportHandlerTestCase { + private static final String FULLIMPORT_QUERY = "select * from x"; + + private static final String DELTA_QUERY = "select id from x where last_modified > NOW"; + + private static final String DELETED_PK_QUERY = "select id from x where last_modified > NOW AND deleted='true'"; + + private static final String dataConfig_delta = + "" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + "\n"; + + private static final List EMPTY_LIST = Collections.EMPTY_LIST; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("dataimport-solrconfig.xml", "dataimport-schema.xml"); + } + + @Before @Override + public void setUp() throws Exception { + super.setUp(); + clearIndex(); + assertU(commit()); + //Logger.getLogger("").setLevel(Level.ALL); + } + + @SuppressWarnings("unchecked") + private void add1document() throws Exception { + List row = new ArrayList(); + row.add(createMap("id", "1", "desc", "bar")); + MockDataSource.setIterator(FULLIMPORT_QUERY, row.iterator()); + + runFullImport(dataConfig_delta); + + assertQ(req("*:* OR add1document"), "//*[@numFound='1']"); + assertQ(req("id:1"), "//*[@numFound='1']"); + assertQ(req("desc:bar"), "//*[@numFound='1']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testDeltaImport_deleteResolvesUnprefixedPk() throws Exception { + add1document(); + MockDataSource.clearCache(); + List deletedRows = new ArrayList(); + deletedRows.add(createMap("id", "1")); + MockDataSource.setIterator(DELETED_PK_QUERY, deletedRows.iterator()); + MockDataSource.setIterator(DELTA_QUERY, EMPTY_LIST.iterator()); + runDeltaImport(dataConfig_delta); + + assertQ(req("*:* OR testDeltaImport_deleteResolvesUnprefixedPk"), "//*[@numFound='0']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testDeltaImport_replace_resolvesUnprefixedPk() throws Exception { + add1document(); + MockDataSource.clearCache(); + List deltaRows = new ArrayList(); + deltaRows.add(createMap("id", "1")); + MockDataSource.setIterator(DELTA_QUERY, deltaRows.iterator()); + MockDataSource.setIterator(DELETED_PK_QUERY, EMPTY_LIST.iterator()); + List rows = new ArrayList(); + rows.add(createMap("id", "1", "desc", "baz")); + MockDataSource.setIterator("select * from x where id='1'", rows.iterator()); + + runDeltaImport(dataConfig_delta); + + assertQ(req("*:* OR testDeltaImport_replace_resolvesUnprefixedPk"), "//*[@numFound='1']"); + assertQ(req("id:1"), "//*[@numFound='1']"); + assertQ(req("desc:bar"), "//*[@numFound='0']"); + assertQ(req("desc:baz"), "//*[@numFound='1']"); + } + + @Test + @SuppressWarnings("unchecked") + public void testDeltaImport_addResolvesUnprefixedPk() throws Exception { + add1document(); + MockDataSource.clearCache(); + + List deltaRows = new ArrayList(); + deltaRows.add(createMap("id", "2")); + MockDataSource.setIterator(DELTA_QUERY, deltaRows.iterator()); + + List rows = new ArrayList(); + rows.add(createMap("id", "2", "desc", "xyzzy")); + MockDataSource.setIterator("select * from x where id='2'", rows.iterator()); + + runDeltaImport(dataConfig_delta); + + assertQ(req("*:* OR testDeltaImport_addResolvesUnprefixedPk"), "//*[@numFound='2']"); + assertQ(req("id:1"), "//*[@numFound='1']"); + assertQ(req("id:2"), "//*[@numFound='1']"); + assertQ(req("desc:bar"), "//*[@numFound='1']"); + assertQ(req("desc:xyzzy"), "//*[@numFound='1']"); + } + +} diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java index 3f57aac..82b354f 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java @@ -17,35 +17,7 @@ package org.apache.solr.handler.admin; * limitations under the License. */ -import static org.apache.solr.cloud.Overseer.QUEUE_OPERATION; -import static org.apache.solr.cloud.OverseerCollectionProcessor.ASYNC; -import static org.apache.solr.cloud.OverseerCollectionProcessor.COLL_CONF; -import static org.apache.solr.cloud.OverseerCollectionProcessor.CREATESHARD; -import static org.apache.solr.cloud.OverseerCollectionProcessor.CREATE_NODE_SET; -import static org.apache.solr.cloud.OverseerCollectionProcessor.DELETEREPLICA; -import static org.apache.solr.cloud.OverseerCollectionProcessor.MAX_SHARDS_PER_NODE; -import static org.apache.solr.cloud.OverseerCollectionProcessor.NUM_SLICES; -import static org.apache.solr.cloud.OverseerCollectionProcessor.REPLICATION_FACTOR; -import static org.apache.solr.cloud.OverseerCollectionProcessor.REQUESTID; -import static org.apache.solr.cloud.OverseerCollectionProcessor.ROUTER; -import static org.apache.solr.cloud.OverseerCollectionProcessor.SHARDS_PROP; -import static org.apache.solr.common.cloud.ZkNodeProps.makeMap; -import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; -import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; -import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDROLE; -import static org.apache.solr.common.params.CollectionParams.CollectionAction.CLUSTERPROP; -import static org.apache.solr.common.params.CollectionParams.CollectionAction.OVERSEERSTATUS; -import static org.apache.solr.common.params.CollectionParams.CollectionAction.REMOVEROLE; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; - +import com.google.common.collect.ImmutableSet; import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrServer; @@ -78,7 +50,34 @@ import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.ImmutableSet; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import static org.apache.solr.cloud.Overseer.QUEUE_OPERATION; +import static org.apache.solr.cloud.OverseerCollectionProcessor.ASYNC; +import static org.apache.solr.cloud.OverseerCollectionProcessor.COLL_CONF; +import static org.apache.solr.cloud.OverseerCollectionProcessor.CREATESHARD; +import static org.apache.solr.cloud.OverseerCollectionProcessor.CREATE_NODE_SET; +import static org.apache.solr.cloud.OverseerCollectionProcessor.DELETEREPLICA; +import static org.apache.solr.cloud.OverseerCollectionProcessor.MAX_SHARDS_PER_NODE; +import static org.apache.solr.cloud.OverseerCollectionProcessor.NUM_SLICES; +import static org.apache.solr.cloud.OverseerCollectionProcessor.REPLICATION_FACTOR; +import static org.apache.solr.cloud.OverseerCollectionProcessor.REQUESTID; +import static org.apache.solr.cloud.OverseerCollectionProcessor.ROUTER; +import static org.apache.solr.cloud.OverseerCollectionProcessor.SHARDS_PROP; +import static org.apache.solr.common.cloud.ZkNodeProps.makeMap; +import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP; +import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; +import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDROLE; +import static org.apache.solr.common.params.CollectionParams.CollectionAction.CLUSTERPROP; +import static org.apache.solr.common.params.CollectionParams.CollectionAction.OVERSEERSTATUS; +import static org.apache.solr.common.params.CollectionParams.CollectionAction.REMOVEROLE; public class CollectionsHandler extends RequestHandlerBase { protected static Logger log = LoggerFactory.getLogger(CollectionsHandler.class); diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index ac2689b..83405a2 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -17,21 +17,6 @@ package org.apache.solr.handler.component; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; - -import org.apache.commons.lang.StringUtils; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.ReaderUtil; @@ -47,14 +32,20 @@ import org.apache.lucene.search.Weight; import org.apache.lucene.search.grouping.GroupDocs; import org.apache.lucene.search.grouping.SearchGroup; import org.apache.lucene.search.grouping.TopGroups; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.*; +import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CursorMarkParams; +import org.apache.solr.common.params.GroupParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.MoreLikeThisParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; @@ -102,6 +93,20 @@ import org.apache.solr.util.SolrPluginUtils; import java.util.Collections; import java.util.Comparator; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + /** * TODO! * @@ -1240,6 +1245,11 @@ public class QueryComponent extends SearchComponent } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public float score() throws IOException { return score; } diff --git a/solr/core/src/java/org/apache/solr/schema/LatLonType.java b/solr/core/src/java/org/apache/solr/schema/LatLonType.java index de5bc61..c5d1a6d 100644 --- a/solr/core/src/java/org/apache/solr/schema/LatLonType.java +++ b/solr/core/src/java/org/apache/solr/schema/LatLonType.java @@ -16,14 +16,10 @@ package org.apache.solr.schema; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; - +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.distance.DistanceUtils; import com.spatial4j.core.shape.Point; - +import com.spatial4j.core.shape.Rectangle; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; @@ -31,7 +27,6 @@ import org.apache.lucene.index.StorableField; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.VectorValueSource; -import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ComplexExplanation; @@ -41,6 +36,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.SortField; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.Bits; import org.apache.solr.common.SolrException; @@ -50,13 +46,14 @@ import org.apache.solr.search.ExtendedQueryBase; import org.apache.solr.search.PostFilter; import org.apache.solr.search.QParser; import org.apache.solr.search.SpatialOptions; - -import com.spatial4j.core.context.SpatialContext; -import com.spatial4j.core.distance.DistanceUtils; -import com.spatial4j.core.shape.Rectangle; - import org.apache.solr.util.SpatialUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; + /** * Represents a Latitude/Longitude as a 2 dimensional point. Latitude is always specified first. @@ -342,13 +339,13 @@ class SpatialDistanceQuery extends ExtendedQueryBase implements PostFilter { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { return new SpatialScorer(context, acceptDocs, this, queryWeight); } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - return ((SpatialScorer)scorer(context, context.reader().getLiveDocs())).explain(doc); + return ((SpatialScorer)scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs())).explain(doc); } } @@ -507,6 +504,11 @@ class SpatialDistanceQuery extends ExtendedQueryBase implements PostFilter { result.addDetail(new Explanation(weight.queryNorm,"queryNorm")); return result; } + + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } } @Override diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java index e2ddab4..4d7ccf2 100644 --- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java @@ -17,14 +17,9 @@ package org.apache.solr.search; -import java.io.IOException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; - +import com.carrotsearch.hppc.FloatArrayList; +import com.carrotsearch.hppc.IntOpenHashSet; +import com.carrotsearch.hppc.cursors.IntCursor; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocValues; @@ -36,14 +31,11 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.FilterCollector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; @@ -62,9 +54,13 @@ import org.apache.solr.schema.TrieFloatField; import org.apache.solr.schema.TrieIntField; import org.apache.solr.schema.TrieLongField; -import com.carrotsearch.hppc.FloatArrayList; -import com.carrotsearch.hppc.IntOpenHashSet; -import com.carrotsearch.hppc.cursors.IntCursor; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; /** @@ -404,6 +400,11 @@ public class CollapsingQParserPlugin extends QParserPlugin { super(null); } + @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + throw new UnsupportedOperationException(); + } + public float score() { return score; } diff --git a/solr/core/src/java/org/apache/solr/search/DocSetBase.java b/solr/core/src/java/org/apache/solr/search/DocSetBase.java index dfe212a..d9fabd9 100644 --- a/solr/core/src/java/org/apache/solr/search/DocSetBase.java +++ b/solr/core/src/java/org/apache/solr/search/DocSetBase.java @@ -27,6 +27,8 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.apache.solr.common.SolrException; +import java.io.IOException; + /** A base class that may be usefull for implementing DocSets */ abstract class DocSetBase implements DocSet { @@ -159,6 +161,33 @@ abstract class DocSetBase implements DocSet { return this.size() - this.intersectionSize(other); } + public static DocIdSet EMPTY_DOCIDSET = new DocIdSet() { + @Override + public DocIdSetIterator iterator() throws IOException { + return new DocIdSetIterator() { + @Override + public int docID() { + return -1; + } + + @Override + public int nextDoc() throws IOException { + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) throws IOException { + return NO_MORE_DOCS; + } + + @Override + public long cost() { + return 0; + } + }; + } + }; + @Override public Filter getTopFilter() { final FixedBitSet bs = getBits(); @@ -178,6 +207,9 @@ abstract class DocSetBase implements DocSet { final int maxDoc = reader.maxDoc(); final int max = base + maxDoc; // one past the max doc in this segment. + if (base > bs.length()) + return EMPTY_DOCIDSET; + return BitsFilteredDocIdSet.wrap(new DocIdSet() { @Override public DocIdSetIterator iterator() { diff --git a/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java index a2b4958..11ea808 100644 --- a/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java @@ -16,13 +16,6 @@ */ package org.apache.solr.search; -import java.io.Closeable; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Fields; @@ -39,6 +32,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -57,6 +51,13 @@ import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.schema.TrieField; import org.apache.solr.util.RefCounted; +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Set; + public class JoinQParserPlugin extends QParserPlugin { public static final String NAME = "join"; @@ -232,7 +233,7 @@ class JoinQuery extends Query { @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { if (filter == null) { boolean debug = rb != null && rb.isDebug(); long start = debug ? System.currentTimeMillis() : 0; @@ -501,7 +502,7 @@ class JoinQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - Scorer scorer = scorer(context, context.reader().getLiveDocs()); + Scorer scorer = scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); boolean exists = scorer.advance(doc) == doc; ComplexExplanation result = new ComplexExplanation(); @@ -561,6 +562,13 @@ class JoinQuery extends Query { } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + if (iter instanceof Scorer) { + return ((Scorer) iter).intervals(collectIntervals); + } + throw new UnsupportedOperationException("Positions are only supported for Scorers"); + } + public long cost() { return iter.cost(); } diff --git a/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java b/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java index dfdf7ad..5028c72 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java +++ b/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java @@ -2,6 +2,7 @@ package org.apache.solr.search; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.*; +import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.AtomicReaderContext; @@ -119,7 +120,7 @@ public class SolrConstantScoreQuery extends ConstantScoreQuery implements Extend } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { return new ConstantScorer(context, this, queryWeight, acceptDocs); } @@ -197,6 +198,13 @@ public class SolrConstantScoreQuery extends ConstantScoreQuery implements Extend } @Override + public IntervalIterator intervals(boolean collectIntervals) throws IOException { + if (docIdSetIterator instanceof Scorer) { + return ((Scorer) docIdSetIterator).intervals(collectIntervals); + } + throw new UnsupportedOperationException("Positions are only supported for Scorers"); + } + public long cost() { return docIdSetIterator.cost(); } diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 50e82bf..698cc4f 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -17,22 +17,6 @@ package org.apache.solr.search; -import java.io.Closeable; -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; @@ -60,6 +44,7 @@ import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.Weight.PostingFeatures; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -114,6 +99,23 @@ import org.apache.solr.update.SolrIndexConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.Closeable; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + /** * SolrIndexSearcher adds schema awareness and caching functionality @@ -2494,7 +2496,7 @@ class FilterImpl extends Filter { iterators.add(iter); } for (Weight w : weights) { - Scorer scorer = w.scorer(context, context.reader().getLiveDocs()); + Scorer scorer = w.scorer(context, PostingFeatures.DOCS_AND_FREQS, context.reader().getLiveDocs()); if (scorer == null) return null; iterators.add(scorer); } diff --git a/solr/core/src/java/org/apache/solr/search/join/IgnoreAcceptDocsQuery.java b/solr/core/src/java/org/apache/solr/search/join/IgnoreAcceptDocsQuery.java index d61b4ce..75fce7d 100644 --- a/solr/core/src/java/org/apache/solr/search/join/IgnoreAcceptDocsQuery.java +++ b/solr/core/src/java/org/apache/solr/search/join/IgnoreAcceptDocsQuery.java @@ -86,8 +86,8 @@ public class IgnoreAcceptDocsQuery extends Query { } @Override - public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { - return w.scorer(context, null); + public Scorer scorer(AtomicReaderContext context, PostingFeatures flags, Bits acceptDocs) throws IOException { + return w.scorer(context, flags, null); } } diff --git a/solr/core/test-lib/cglib-nodep-2.2.jar b/solr/core/test-lib/cglib-nodep-2.2.jar new file mode 100644 index 0000000..ed07cb5 Binary files /dev/null and b/solr/core/test-lib/cglib-nodep-2.2.jar differ diff --git a/solr/core/test-lib/commons-collections-3.2.1.jar b/solr/core/test-lib/commons-collections-3.2.1.jar new file mode 100644 index 0000000..c35fa1f Binary files /dev/null and b/solr/core/test-lib/commons-collections-3.2.1.jar differ diff --git a/solr/core/test-lib/dom4j-1.6.1.jar b/solr/core/test-lib/dom4j-1.6.1.jar new file mode 100644 index 0000000..c8c4dbb Binary files /dev/null and b/solr/core/test-lib/dom4j-1.6.1.jar differ diff --git a/solr/core/test-lib/easymock-3.0.jar b/solr/core/test-lib/easymock-3.0.jar new file mode 100644 index 0000000..2728943 Binary files /dev/null and b/solr/core/test-lib/easymock-3.0.jar differ diff --git a/solr/core/test-lib/javax.servlet-api-3.0.1.jar b/solr/core/test-lib/javax.servlet-api-3.0.1.jar new file mode 100644 index 0000000..4e2edcc Binary files /dev/null and b/solr/core/test-lib/javax.servlet-api-3.0.1.jar differ diff --git a/solr/core/test-lib/jetty-6.1.26.jar b/solr/core/test-lib/jetty-6.1.26.jar new file mode 100644 index 0000000..2cbe07a Binary files /dev/null and b/solr/core/test-lib/jetty-6.1.26.jar differ diff --git a/solr/core/test-lib/jetty-util-6.1.26.jar b/solr/core/test-lib/jetty-util-6.1.26.jar new file mode 100644 index 0000000..cd23752 Binary files /dev/null and b/solr/core/test-lib/jetty-util-6.1.26.jar differ diff --git a/solr/core/test-lib/objenesis-1.2.jar b/solr/core/test-lib/objenesis-1.2.jar new file mode 100644 index 0000000..fb04d7f Binary files /dev/null and b/solr/core/test-lib/objenesis-1.2.jar differ diff --git a/solr/licenses/jetty-continuation-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-continuation-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..1276297 --- /dev/null +++ b/solr/licenses/jetty-continuation-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +545d335d2f6d5e195939528f6a37f23abad4f58f diff --git a/solr/licenses/jetty-deploy-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-deploy-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..3414c0f --- /dev/null +++ b/solr/licenses/jetty-deploy-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +79b1ef70ba4bb4c05d35516f795ff306a96bb25a diff --git a/solr/licenses/jetty-http-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-http-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..aafa446 --- /dev/null +++ b/solr/licenses/jetty-http-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +650858c9c7344da2455b60069224ee148a80bdc5 diff --git a/solr/licenses/jetty-io-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-io-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..13f2aae --- /dev/null +++ b/solr/licenses/jetty-io-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +2da8e10c38250f713764a31bc4b7dbc58983de0e diff --git a/solr/licenses/jetty-jmx-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-jmx-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..6368fa2 --- /dev/null +++ b/solr/licenses/jetty-jmx-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +049299fdc468aec112070369513f363447c12e76 diff --git a/solr/licenses/jetty-security-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-security-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..4ce07f9 --- /dev/null +++ b/solr/licenses/jetty-security-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +3a559bfb2788e71b4469631497c58c93ba273259 diff --git a/solr/licenses/jetty-server-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-server-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..1dc00a9 --- /dev/null +++ b/solr/licenses/jetty-server-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +5d56afa0f80e90aa40c5af42b4f7b82992794f1f diff --git a/solr/licenses/jetty-servlet-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-servlet-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..9667c1f --- /dev/null +++ b/solr/licenses/jetty-servlet-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +d855e7a18f0381b6128ccf4563355e969f826433 diff --git a/solr/licenses/jetty-util-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-util-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..f7322cb --- /dev/null +++ b/solr/licenses/jetty-util-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +d14aef3cae042cd9716fb109d1205bfd84248956 diff --git a/solr/licenses/jetty-webapp-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-webapp-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..a1709c7 --- /dev/null +++ b/solr/licenses/jetty-webapp-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +436ed4d774f26ac348e4a84938af19130b8f9773 diff --git a/solr/licenses/jetty-xml-8.1.2.v20120308.jar.sha1 b/solr/licenses/jetty-xml-8.1.2.v20120308.jar.sha1 new file mode 100644 index 0000000..3f98b4e --- /dev/null +++ b/solr/licenses/jetty-xml-8.1.2.v20120308.jar.sha1 @@ -0,0 +1 @@ +ade750a7b75b6ce58c6e50347b2c1e6dafc1eb4b diff --git a/solr/licenses/zookeeper-3.3.6.jar.sha1 b/solr/licenses/zookeeper-3.3.6.jar.sha1 new file mode 100644 index 0000000..8bd4cd0 --- /dev/null +++ b/solr/licenses/zookeeper-3.3.6.jar.sha1 @@ -0,0 +1 @@ +36825ff1595144d42d2f3a51f810eaefdcf8cb79