Index: lucene/core/src/test/org/apache/lucene/search/TestProxBooleanTermQuery.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestProxBooleanTermQuery.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/search/TestProxBooleanTermQuery.java (working copy) @@ -0,0 +1,130 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +public class TestProxBooleanTermQuery extends LuceneTestCase { + + public void test() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new TextField("field", "here is some text", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher s = newSearcher(r); + ProxBooleanTermQuery q = new ProxBooleanTermQuery("field", Occur.SHOULD); + q.add(new BytesRef("here")); + q.add(new BytesRef("some")); + TopDocs hits = s.search(q, 10); + assertEquals(1, hits.totalHits); + + q = new ProxBooleanTermQuery("field", Occur.MUST); + q.add(new BytesRef("here")); + q.add(new BytesRef("some")); + hits = s.search(q, 10); + assertEquals(1, hits.totalHits); + + r.close(); + + dir.close(); + } + + // Make sure prox can be used when there's just one term: + public void testSingleTermRepeated() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + Document doc = new Document(); + doc.add(newStringField("id", "0", Field.Store.YES)); + doc.add(newTextField("field", "oz the the oz the the oz", Field.Store.NO)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "1", Field.Store.YES)); + // Has 1 extra the token: + doc.add(newTextField("field", "oz oz oz the the the the the the the", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher searcher = newSearcher(r); + + // Do ordinary TermQuery: + TopDocs hits = searcher.search(new TermQuery(new Term("field", "oz")), 10); + assertEquals(2, hits.totalHits); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + ProxBooleanTermQuery pq = new ProxBooleanTermQuery("field", Occur.SHOULD); + pq.add(new BytesRef("oz")); + + hits = searcher.search(pq, 10); + + assertEquals(2, hits.totalHits); + assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + r.close(); + dir.close(); + } + + // Make sure query works fine if a term does not exist + public void testTermDoesNotExist() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + Document doc = new Document(); + doc.add(newStringField("id", "0", Field.Store.YES)); + doc.add(newTextField("field", "oz the the oz the the oz", Field.Store.NO)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "1", Field.Store.YES)); + // Has 1 extra the token: + doc.add(newTextField("field", "oz oz oz the the the the the the the", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher searcher = newSearcher(r); + + ProxBooleanTermQuery pq = new ProxBooleanTermQuery("field", Occur.SHOULD); + pq.add(new BytesRef("wizard")); + pq.add(new BytesRef("oz")); + + TopDocs hits = searcher.search(pq, 10); + + assertEquals(2, hits.totalHits); + assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + r.close(); + dir.close(); + } +} Property changes on: lucene/core/src/test/org/apache/lucene/search/TestProxBooleanTermQuery.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java (working copy) @@ -0,0 +1,86 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +public class TestQueryRescorer extends LuceneTestCase { + + public void testBasic() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + Document doc = new Document(); + doc.add(newStringField("id", "0", Field.Store.YES)); + doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "1", Field.Store.YES)); + // 1 extra token, but wizard and oz are close; + doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + // Do ordinary BooleanQuery: + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD); + bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD); + IndexSearcher searcher = newSearcher(r); + + TopDocs hits = searcher.search(bq, 10); + assertEquals(2, hits.totalHits); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + // Now, resort using ProxBooleanTermQuery, SHOULD: + ProxBooleanTermQuery pq = new ProxBooleanTermQuery("field", Occur.SHOULD); + pq.add(new BytesRef("wizard")); + pq.add(new BytesRef("oz")); + + TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10); + + assertEquals(2, hits2.totalHits); + assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id")); + assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id")); + + // Now, resort using ProxBooleanTermQuery, MUST: + pq = new ProxBooleanTermQuery("field", Occur.MUST); + pq.add(new BytesRef("wizard")); + pq.add(new BytesRef("oz")); + + hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10); + + assertEquals(2, hits2.totalHits); + assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id")); + assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id")); + + r.close(); + dir.close(); + } + + // ncoommit test w/ PhraseQuery as rescorer +} Property changes on: lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (revision 1533170) +++ lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (working copy) @@ -163,8 +163,7 @@ * Expert: the Weight for BooleanQuery, used to * normalize, score and explain these queries. * - *
NOTE: this API and implementation is subject to - * change suddenly in the next release.
+ * @lucene.experimental */ protected class BooleanWeight extends Weight { /** The Similarity implementation. */ @@ -322,6 +321,11 @@ } } + return scorer(required, prohibited, optional, scoreDocsInOrder, topScorer, acceptDocs); + } + + protected Scorer scorer(ListTermScorer.
Index: lucene/core/src/java/org/apache/lucene/search/ProxBooleanTermQuery.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/ProxBooleanTermQuery.java (revision 0)
+++ lucene/core/src/java/org/apache/lucene/search/ProxBooleanTermQuery.java (working copy)
@@ -0,0 +1,558 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ TODO
+ - take TermStates so a custom rewrite method can avoid
+ double-term lookup
+ - coord
+ - what about multi-fields?
+ - payloads?
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.TermQuery.TermWeight;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PriorityQueue;
+
+/** Scores like {@link BooleanQuery}, but then also boosts
+ * matches where terms occur close to (in proximity of) one
+ * another. The field must be indexed with positions.
+ * Typically, this query is too costly to run as the
+ * primary query; instead, run a simpler first-pass query
+ * and then use {@link QueryRescorer} to rescore the top
+ * results using this query.
+ *
+ * @lucene.experimental
+ */
+
+public class ProxBooleanTermQuery extends Query {
+
+ private final BooleanClause.Occur occur;
+ private final String field;
+ private final List