new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery()) instead.
+ * This query will be removed in Lucene 4.0
*/
-public class SpanRegexQuery extends SpanQuery implements RegexQueryCapable {
- private RegexCapabilities regexImpl = new JavaUtilRegexCapabilities();
- private Term term;
+@Deprecated
+public class SpanRegexQuery extends SpanMultiTermQueryWrapperPlease note: This attribute is intended to be added only by the TermsEnum
+ * to itsself in its constructor and consumed by the {@link MultiTermQuery.RewriteMethod}.
+ * @lucene.internal
+ */
+public interface BoostAttribute extends Attribute {
+ /** Sets the boost in this attribute */
+ public void setBoost(float boost);
+ /** Retrieves the boost, default is {@code 1.0f}. */
+ public float getBoost();
+}
Property changes on: lucene\src\java\org\apache\lucene\search\BoostAttribute.java
___________________________________________________________________
Added: svn:keywords
+ Date Author Id Revision HeadURL
Added: svn:eol-style
+ native
Index: lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java (revision 0)
@@ -0,0 +1,60 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeImpl;
+
+/** Implementation class for {@link BoostAttribute}.
+ * @lucene.internal
+ */
+public final class BoostAttributeImpl extends AttributeImpl implements BoostAttribute {
+ private float boost = 1.0f;
+
+ public void setBoost(float boost) {
+ this.boost = boost;
+ }
+
+ public float getBoost() {
+ return boost;
+ }
+
+ @Override
+ public void clear() {
+ boost = 1.0f;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other)
+ return true;
+ if (other instanceof BoostAttributeImpl)
+ return ((BoostAttributeImpl) other).boost == boost;
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return Float.floatToIntBits(boost);
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ ((BoostAttribute) target).setBoost(boost);
+ }
+}
Property changes on: lucene\src\java\org\apache\lucene\search\BoostAttributeImpl.java
___________________________________________________________________
Added: svn:keywords
+ Date Author Id Revision HeadURL
Added: svn:eol-style
+ native
Index: lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (revision 0)
@@ -0,0 +1,186 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Comparator;
+
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
+
+class ConstantScoreAutoRewrite extends TermCollectingRewrite Please note: This attribute is intended to be added by the {@link MultiTermQuery.RewriteMethod}
+ * to an empty {@link AttributeSource} that is shared for all segments
+ * during query rewrite. This attribute source is passed to all segment enums
+ * on {@link MultiTermQuery#getTermsEnum(IndexReader,AttributeSource)}.
+ * {@link TopTermsRewrite} uses this attribute to
+ * inform all enums about the current boost, that is not competitive.
+ * @lucene.internal
+ */
+public interface MaxNonCompetitiveBoostAttribute extends Attribute {
+ /** This is the maximum boost that would not be competitive. */
+ public void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost);
+ /** This is the maximum boost that would not be competitive. Default is negative infinity, which means every term is competitive. */
+ public float getMaxNonCompetitiveBoost();
+ /** This is the term or Please note: This attribute is intended to be added only by the TermsEnum
- * to itsself in its constructor and consumed by the {@link RewriteMethod}.
- * @lucene.internal
- */
- public static interface BoostAttribute extends Attribute {
- /** Sets the boost in this attribute */
- public void setBoost(float boost);
- /** Retrieves the boost, default is {@code 1.0f}. */
- public float getBoost();
- }
- /** Implementation class for {@link BoostAttribute}. */
- public static final class BoostAttributeImpl extends AttributeImpl implements BoostAttribute {
- private float boost = 1.0f;
-
- public void setBoost(float boost) {
- this.boost = boost;
- }
-
- public float getBoost() {
- return boost;
- }
-
- @Override
- public void clear() {
- boost = 1.0f;
- }
-
- @Override
- public boolean equals(Object other) {
- if (this == other)
- return true;
- if (other instanceof BoostAttributeImpl)
- return ((BoostAttributeImpl) other).boost == boost;
- return false;
- }
-
- @Override
- public int hashCode() {
- return Float.floatToIntBits(boost);
- }
-
- @Override
- public void copyTo(AttributeImpl target) {
- ((BoostAttribute) target).setBoost(boost);
- }
- }
-
- /** Add this {@link Attribute} to a fresh {@link AttributeSource} before calling
- * {@link #getTermsEnum(IndexReader,AttributeSource)}.
- * {@link FuzzyQuery} is using this to control its internal behaviour
- * to only return competitive terms.
- * Please note: This attribute is intended to be added by the {@link RewriteMethod}
- * to an empty {@link AttributeSource} that is shared for all segments
- * during query rewrite. This attribute source is passed to all segment enums
- * on {@link #getTermsEnum(IndexReader,AttributeSource)}.
- * {@link TopTermsBooleanQueryRewrite} uses this attribute to
- * inform all enums about the current boost, that is not competitive.
- * @lucene.internal
- */
- public static interface MaxNonCompetitiveBoostAttribute extends Attribute {
- /** This is the maximum boost that would not be competitive. */
- public void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost);
- /** This is the maximum boost that would not be competitive. Default is negative infinity, which means every term is competitive. */
- public float getMaxNonCompetitiveBoost();
- /** This is the term or
- * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than
- * NOTE: This rewrite method will hit {@link
+ * BooleanQuery.TooManyClauses} if the number of terms
+ * exceeds {@link BooleanQuery#getMaxClauseCount}.
+ *
+ * @see #setRewriteMethod */
+ public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = ScoringRewrite.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
- @Override
- public boolean equals(Object obj) {
- if (this == obj) return true;
- if (obj == null) return false;
- if (getClass() != obj.getClass()) return false;
- TopTermsBooleanQueryRewrite other = (TopTermsBooleanQueryRewrite) obj;
- if (size != other.size) return false;
- return true;
- }
-
- private static final Comparator NOTE: This rewrite method will hit {@link
- * BooleanQuery.TooManyClauses} if the number of terms
- * exceeds {@link BooleanQuery#getMaxClauseCount}.
- *
- * @see #setRewriteMethod */
- public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new ConstantScoreBooleanQueryRewrite();
-
-
+
/** A rewrite method that tries to pick the best
* constant-score rewrite method based on term and
* document counts from the query. If both the number of
@@ -666,141 +214,8 @@
* Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is
* used.
*/
- public static class ConstantScoreAutoRewrite extends BooleanQueryRewrite {
+ public static class ConstantScoreAutoRewrite extends org.apache.lucene.search.ConstantScoreAutoRewrite {}
- // Defaults derived from rough tests with a 20.0 million
- // doc Wikipedia index. With more than 350 terms in the
- // query, the filter method is fastest:
- public static int DEFAULT_TERM_COUNT_CUTOFF = 350;
-
- // If the query will hit more than 1 in 1000 of the docs
- // in the index (0.1%), the filter method is fastest:
- public static double DEFAULT_DOC_COUNT_PERCENT = 0.1;
-
- private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
- private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
-
- /** If the number of terms in this query is equal to or
- * larger than this setting then {@link
- * #CONSTANT_SCORE_FILTER_REWRITE} is used. */
- public void setTermCountCutoff(int count) {
- termCountCutoff = count;
- }
-
- /** @see #setTermCountCutoff */
- public int getTermCountCutoff() {
- return termCountCutoff;
- }
-
- /** If the number of documents to be visited in the
- * postings exceeds this specified percentage of the
- * maxDoc() for the index, then {@link
- * #CONSTANT_SCORE_FILTER_REWRITE} is used.
- * @param percent 0.0 to 100.0 */
- public void setDocCountPercent(double percent) {
- docCountPercent = percent;
- }
-
- /** @see #setDocCountPercent */
- public double getDocCountPercent() {
- return docCountPercent;
- }
-
- @Override
- public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
-
- // Get the enum and start visiting terms. If we
- // exhaust the enum before hitting either of the
- // cutoffs, we use ConstantBooleanQueryRewrite; else,
- // ConstantFilterRewrite:
- final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
- final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
-
- final CutOffTermCollector col = new CutOffTermCollector(docCountCutoff, termCountLimit);
- collectTerms(reader, query, col);
- final int size = col.pendingTerms.size();
- if (col.hasCutOff) {
- return CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
- } else if (size == 0) {
- return new BooleanQuery(true);
- } else {
- final BooleanQuery bq = new BooleanQuery(true);
- final Term placeholderTerm = new Term(query.field);
- final BytesRefHash pendingTerms = col.pendingTerms;
- final int sort[] = pendingTerms.sort(col.termsEnum.getComparator());
- for(int i = 0; i < size; i++) {
- // docFreq is not used for constant score here, we pass 1
- // to explicitely set a fake value, so it's not calculated
- bq.add(new TermQuery(
- placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1
- ), BooleanClause.Occur.SHOULD);
- }
- // Strip scores
- final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
- result.setBoost(query.getBoost());
- query.incTotalNumberOfTerms(size);
- return result;
- }
- }
-
- static final class CutOffTermCollector extends TermCollector {
- CutOffTermCollector(int docCountCutoff, int termCountLimit) {
- this.docCountCutoff = docCountCutoff;
- this.termCountLimit = termCountLimit;
- }
-
- @Override
- public void setNextEnum(TermsEnum termsEnum) throws IOException {
- this.termsEnum = termsEnum;
- }
-
- @Override
- public boolean collect(BytesRef bytes) throws IOException {
- pendingTerms.add(bytes);
- docVisitCount += termsEnum.docFreq();
- if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
- hasCutOff = true;
- return false;
- }
- return true;
- }
-
- int docVisitCount = 0;
- boolean hasCutOff = false;
- TermsEnum termsEnum;
-
- final int docCountCutoff, termCountLimit;
- final BytesRefHash pendingTerms = new BytesRefHash();
- }
-
- @Override
- public int hashCode() {
- final int prime = 1279;
- return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent));
- }
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
-
- ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj;
- if (other.termCountCutoff != termCountCutoff) {
- return false;
- }
-
- if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) {
- return false;
- }
-
- return true;
- }
- }
-
/** Read-only default instance of {@link
* ConstantScoreAutoRewrite}, with {@link
* ConstantScoreAutoRewrite#setTermCountCutoff} set to
@@ -851,7 +266,7 @@
* positioned to the first matching term.
* The given {@link AttributeSource} is passed by the {@link RewriteMethod} to
* provide attributes, the rewrite method uses to inform about e.g. maximum competitive boosts.
- * This is currently only used by {@link TopTermsBooleanQueryRewrite}
+ * This is currently only used by {@link TopTermsRewrite}
*/
protected abstract TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException;
Index: lucene/src/java/org/apache/lucene/search/ScoringRewrite.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (revision 0)
@@ -0,0 +1,203 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Comparator;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
+
+/** @lucene.internal Only public to be accessible by spans package. */
+public abstract class ScoringRewrite NOTE: This rewrite method will hit {@link
+ * BooleanQuery.TooManyClauses} if the number of terms
+ * exceeds {@link BooleanQuery#getMaxClauseCount}.
+ *
+ * @see #setRewriteMethod */
+ public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringRewrite NOTE: This rewrite method will hit {@link
+ * BooleanQuery.TooManyClauses} if the number of terms
+ * exceeds {@link BooleanQuery#getMaxClauseCount}.
+ *
+ * @see #setRewriteMethod */
+ public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new RewriteMethod() {
+ @Override
+ public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
+ Query result = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query);
+ assert result instanceof BooleanQuery;
+ // TODO: if empty boolean query return NullQuery?
+ if (!((BooleanQuery) result).clauses().isEmpty()) {
+ // strip the scores off
+ result = new ConstantScoreQuery(new QueryWrapperFilter(result));
+ result.setBoost(query.getBoost());
+ }
+ return result;
+ }
+
+ // Make sure we are still a singleton even after deserializing
+ protected Object readResolve() {
+ return CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
+ }
+ };
+
+ @Override
+ public final Q rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
+ final Q result = getTopLevelQuery();
+ final ParallelArraysTermCollector col = new ParallelArraysTermCollector(result instanceof BooleanQuery);
+ collectTerms(reader, query, col);
+
+ final Term placeholderTerm = new Term(query.field);
+ final int size = col.terms.size();
+ if (size > 0) {
+ final int sort[] = col.terms.sort(col.termsEnum.getComparator());
+ final int[] docFreq = col.array.docFreq;
+ final float[] boost = col.array.boost;
+ for (int i = 0; i < size; i++) {
+ final int pos = sort[i];
+ final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef()));
+ assert reader.docFreq(term) == docFreq[pos];
+ addClause(result, term, docFreq[pos], query.getBoost() * boost[pos]);
+ }
+ }
+ query.incTotalNumberOfTerms(size);
+ return result;
+ }
+
+ static final class ParallelArraysTermCollector extends TermCollector {
+ private final boolean checkMaxClauseCount;
+ final TermFreqBoostByteStart array = new TermFreqBoostByteStart(16);
+ final BytesRefHash terms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array);
+ TermsEnum termsEnum;
+
+ private BoostAttribute boostAtt;
+
+ public ParallelArraysTermCollector(boolean checkMaxClauseCount) {
+ this.checkMaxClauseCount = checkMaxClauseCount;
+ }
+
+ @Override
+ public void setNextEnum(TermsEnum termsEnum) throws IOException {
+ this.termsEnum = termsEnum;
+ this.boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
+ }
+
+ @Override
+ public boolean collect(BytesRef bytes) {
+ final int e = terms.add(bytes);
+ if (e < 0 ) {
+ // duplicate term: update docFreq
+ final int pos = (-e)-1;
+ array.docFreq[pos] += termsEnum.docFreq();
+ assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums";
+ } else {
+ // new entry: we populate the entry initially
+ array.docFreq[e] = termsEnum.docFreq();
+ array.boost[e] = boostAtt.getBoost();
+ }
+ // if the new entry reaches the max clause count, we exit early
+ if (checkMaxClauseCount && e >= BooleanQuery.getMaxClauseCount())
+ throw new BooleanQuery.TooManyClauses();
+ return true;
+ }
+ }
+
+ /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */
+ static final class TermFreqBoostByteStart extends DirectBytesStartArray {
+ int[] docFreq;
+ float[] boost;
+
+ public TermFreqBoostByteStart(int initSize) {
+ super(initSize);
+ }
+
+ @Override
+ public int[] init() {
+ final int[] ord = super.init();
+ boost = new float[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_FLOAT)];
+ docFreq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
+ assert boost.length >= ord.length && docFreq.length >= ord.length;
+ return ord;
+ }
+
+ @Override
+ public int[] grow() {
+ final int[] ord = super.grow();
+ docFreq = ArrayUtil.grow(docFreq, ord.length);
+ boost = ArrayUtil.grow(boost, ord.length);
+ assert boost.length >= ord.length && docFreq.length >= ord.length;
+ return ord;
+ }
+
+ @Override
+ public int[] clear() {
+ boost = null;
+ docFreq = null;
+ return super.clear();
+ }
+
+ }
+}
Property changes on: lucene\src\java\org\apache\lucene\search\ScoringRewrite.java
___________________________________________________________________
Added: svn:keywords
+ Date Author Id Revision HeadURL
Added: svn:eol-style
+ native
Index: lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 0)
@@ -0,0 +1,234 @@
+package org.apache.lucene.search.spans;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopTermsRewrite;
+import org.apache.lucene.search.ScoringRewrite;
+import org.apache.lucene.search.BooleanClause.Occur; // javadocs only
+
+/**
+ * Wraps any {@link MultiTermQuery} as a {@link SpanQuery},
+ * so it can be nested within other SpanQuery classes.
+ *
+ * The query is rewritten by default to a {@link SpanOrQuery} containing
+ * the expanded terms, but this can be customized.
+ *
+ * Example:
+ *
+ * NOTE: This will call {@link MultiTermQuery#setRewriteMethod(MultiTermQuery.RewriteMethod)}
+ * on the wrapped
+ * This rewrite method only uses the top scoring terms so it will not overflow
+ * the boolean max clause count.
+ *
+ * @see #setRewriteMethod
+ */
+ public static final class TopTermsSpanBooleanQueryRewrite extends SpanRewriteMethod {
+ private final TopTermsRewrite
+ * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than
+ * reader which share a prefix of
Index: lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttribute.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttribute.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttribute.java (revision 0)
@@ -0,0 +1,45 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeSource; // javadocs only
+import org.apache.lucene.util.BytesRef;
+
+/** Add this {@link Attribute} to a fresh {@link AttributeSource} before calling
+ * {@link MultiTermQuery#getTermsEnum(IndexReader,AttributeSource)}.
+ * {@link FuzzyQuery} is using this to control its internal behaviour
+ * to only return competitive terms.
+ * null of the term that triggered the boost change. */
+ public void setCompetitiveTerm(BytesRef competitiveTerm);
+ /** This is the term or null of the term that triggered the boost change. Default is null, which means every term is competitoive. */
+ public BytesRef getCompetitiveTerm();
+}
Property changes on: lucene\src\java\org\apache\lucene\search\MaxNonCompetitiveBoostAttribute.java
___________________________________________________________________
Added: svn:keywords
+ Date Author Id Revision HeadURL
Added: svn:eol-style
+ native
Index: lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java (revision 0)
@@ -0,0 +1,78 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+
+/** Implementation class for {@link MaxNonCompetitiveBoostAttribute}.
+ * @lucene.internal
+ */
+public final class MaxNonCompetitiveBoostAttributeImpl extends AttributeImpl implements MaxNonCompetitiveBoostAttribute {
+ private float maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
+ private BytesRef competitiveTerm = null;
+
+ public void setMaxNonCompetitiveBoost(final float maxNonCompetitiveBoost) {
+ this.maxNonCompetitiveBoost = maxNonCompetitiveBoost;
+ }
+
+ public float getMaxNonCompetitiveBoost() {
+ return maxNonCompetitiveBoost;
+ }
+
+ public void setCompetitiveTerm(final BytesRef competitiveTerm) {
+ this.competitiveTerm = competitiveTerm;
+ }
+
+ public BytesRef getCompetitiveTerm() {
+ return competitiveTerm;
+ }
+
+ @Override
+ public void clear() {
+ maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
+ competitiveTerm = null;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other)
+ return true;
+ if (other instanceof MaxNonCompetitiveBoostAttributeImpl) {
+ final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other;
+ return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost)
+ && (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm));
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = Float.floatToIntBits(maxNonCompetitiveBoost);
+ if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode();
+ return hash;
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ final MaxNonCompetitiveBoostAttributeImpl t = (MaxNonCompetitiveBoostAttributeImpl) target;
+ t.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
+ t.setCompetitiveTerm(competitiveTerm);
+ }
+}
Property changes on: lucene\src\java\org\apache\lucene\search\MaxNonCompetitiveBoostAttributeImpl.java
___________________________________________________________________
Added: svn:keywords
+ Date Author Id Revision HeadURL
Added: svn:eol-style
+ native
Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1034499)
+++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy)
@@ -19,29 +19,12 @@
import java.io.IOException;
import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.PriorityQueue;
-import java.util.Comparator;
-import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Attribute;
-import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.ByteBlockPool;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefHash;
-import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.ReaderUtil;
-import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
/**
* An abstract {@link Query} that matches documents
@@ -80,156 +63,12 @@
protected final String field;
protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
transient int numberOfTerms = 0;
-
- /** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link #getTermsEnum(IndexReader,AttributeSource)}
- * and update the boost on each returned term. This enables to control the boost factor
- * for each matching term in {@link #SCORING_BOOLEAN_QUERY_REWRITE} or
- * {@link TopTermsBooleanQueryRewrite} mode.
- * {@link FuzzyQuery} is using this to take the edit distance into account.
- * null of the term that triggered the boost change. */
- public void setCompetitiveTerm(BytesRef competitiveTerm);
- /** This is the term or null of the term that triggered the boost change. Default is null, which means every term is competitoive. */
- public BytesRef getCompetitiveTerm();
- }
-
- /** Implementation class for {@link MaxNonCompetitiveBoostAttribute}. */
- public static final class MaxNonCompetitiveBoostAttributeImpl extends AttributeImpl implements MaxNonCompetitiveBoostAttribute {
- private float maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
- private BytesRef competitiveTerm = null;
-
- public void setMaxNonCompetitiveBoost(final float maxNonCompetitiveBoost) {
- this.maxNonCompetitiveBoost = maxNonCompetitiveBoost;
- }
-
- public float getMaxNonCompetitiveBoost() {
- return maxNonCompetitiveBoost;
- }
-
- public void setCompetitiveTerm(final BytesRef competitiveTerm) {
- this.competitiveTerm = competitiveTerm;
- }
-
- public BytesRef getCompetitiveTerm() {
- return competitiveTerm;
- }
-
- @Override
- public void clear() {
- maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
- competitiveTerm = null;
- }
-
- @Override
- public boolean equals(Object other) {
- if (this == other)
- return true;
- if (other instanceof MaxNonCompetitiveBoostAttributeImpl) {
- final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other;
- return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost)
- && (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm));
- }
- return false;
- }
-
- @Override
- public int hashCode() {
- int hash = Float.floatToIntBits(maxNonCompetitiveBoost);
- if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode();
- return hash;
- }
-
- @Override
- public void copyTo(AttributeImpl target) {
- final MaxNonCompetitiveBoostAttributeImpl t = (MaxNonCompetitiveBoostAttributeImpl) target;
- t.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
- t.setCompetitiveTerm(competitiveTerm);
- }
- }
-
/** Abstract class that defines how the query is rewritten. */
public static abstract class RewriteMethod implements Serializable {
public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException;
}
- private static final class ConstantScoreFilterRewrite extends RewriteMethod {
- @Override
- public Query rewrite(IndexReader reader, MultiTermQuery query) {
- Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFiltersize terms.
- * size, then it will be used instead.
- */
- public TopTermsBooleanQueryRewrite(int size) {
- this.size = size;
- }
-
- /** Return a suitable Query for a MultiTermQuery term. */
- protected abstract Query getQuery(Term term, int docCount);
-
- @Override
- public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
- final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount());
- final PriorityQueue extends TermCollectingRewrite
{
+
+ /** A rewrite method that first translates each term into
+ * {@link BooleanClause.Occur#SHOULD} clause in a
+ * BooleanQuery, and keeps the scores as computed by the
+ * query. Note that typically such scores are
+ * meaningless to the user, and require non-trivial CPU
+ * to compute, so it's almost always better to use {@link
+ * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead.
+ *
+ *
+ */
+public class SpanMultiTermQueryWrapper
+ * {@code
+ * WildcardQuery wildcard = new WildcardQuery(new Term("field", "bro?n"));
+ * SpanQuery spanWildcard = new SpanMultiTermQueryWrapper extends SpanQuery {
+ protected final Q query;
+
+ /**
+ * Create a new SpanMultiTermQueryWrapper.
+ *
+ * @param query Query to wrap.
+ *
query, changing its rewrite method to a suitable one for spans.
+ * Be sure to not change the rewrite method on the wrapped query afterwards! Doing so will
+ * throw {@link UnsupportedOperationException} on rewriting this query!
+ */
+ public SpanMultiTermQueryWrapper(Q query) {
+ this.query = query;
+
+ MultiTermQuery.RewriteMethod method = query.getRewriteMethod();
+ if (method instanceof TopTermsRewrite) {
+ final int pqsize = ((TopTermsRewrite) method).getSize();
+ setRewriteMethod(new TopTermsSpanBooleanQueryRewrite(pqsize));
+ } else {
+ setRewriteMethod(SCORING_SPAN_QUERY_REWRITE);
+ }
+ }
+
+ /**
+ * Expert: returns the rewriteMethod
+ */
+ public final SpanRewriteMethod getRewriteMethod() {
+ final MultiTermQuery.RewriteMethod m = query.getRewriteMethod();
+ if (!(m instanceof SpanRewriteMethod))
+ throw new UnsupportedOperationException("You can only use SpanMultiTermQueryWrapper with a suitable SpanRewriteMethod.");
+ return (SpanRewriteMethod) m;
+ }
+
+ /**
+ * Expert: sets the rewrite method. This only makes sense
+ * to be a span rewrite method.
+ */
+ public final void setRewriteMethod(SpanRewriteMethod rewriteMethod) {
+ query.setRewriteMethod(rewriteMethod);
+ }
+
+ @Override
+ public Spans getSpans(IndexReader reader) throws IOException {
+ throw new UnsupportedOperationException("Query should have been rewritten");
+ }
+
+ @Override
+ public String getField() {
+ return query.getField();
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder builder = new StringBuilder();
+ builder.append("SpanMultiTermQueryWrapper(");
+ builder.append(query.toString(field));
+ builder.append(")");
+ return builder.toString();
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ final Query q = query.rewrite(reader);
+ if (!(q instanceof SpanQuery))
+ throw new UnsupportedOperationException("You can only use SpanMultiTermQueryWrapper with a suitable SpanRewriteMethod.");
+ return q;
+ }
+
+ @Override
+ public int hashCode() {
+ return 31 * query.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (getClass() != obj.getClass()) return false;
+ final SpanMultiTermQueryWrapper other = (SpanMultiTermQueryWrapper) obj;
+ return query.equals(other.query);
+ }
+
+ /** Abstract class that defines how the query is rewritten. */
+ public static abstract class SpanRewriteMethod extends MultiTermQuery.RewriteMethod {
+ @Override
+ public abstract SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException;
+ }
+
+ /**
+ * A rewrite method that first translates each term into a SpanTermQuery in a
+ * {@link Occur#SHOULD} clause in a BooleanQuery, and keeps the
+ * scores as computed by the query.
+ *
+ * @see #setRewriteMethod
+ */
+ public final static SpanRewriteMethod SCORING_SPAN_QUERY_REWRITE = new SpanRewriteMethod() {
+ private final ScoringRewritesize terms.
+ */
+ public TopTermsSpanBooleanQueryRewrite(int size) {
+ delegate = new TopTermsRewrite extends MultiTermQuery.RewriteMethod {
+
+ /** Return a suitable top-level Query for holding all expanded terms. */
+ protected abstract Q getTopLevelQuery() throws IOException;
+
+ /** Add a MultiTermQuery term to the top-level query */
+ protected abstract void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException;
+
+ protected final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
+ final List
extends TermCollectingRewrite
{
+
+ private final int size;
+
+ /**
+ * Create a TopTermsBooleanQueryRewrite for
+ * at most
size terms.
+ * size, then it will be used instead.
+ */
+ public TopTermsRewrite(int size) {
+ this.size = size;
+ }
+
+ /** return the maximum priority queue size */
+ public int getSize() {
+ return size;
+ }
+
+ /** return the maximum size of the priority queue (for boolean rewrites this is BooleanQuery#getMaxClauseCount). */
+ protected abstract int getMaxSize();
+
+ @Override
+ public final Q rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
+ final int maxSize = Math.min(size, getMaxSize());
+ final PriorityQueue