Index: lucene/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java (revision 0) @@ -0,0 +1,259 @@ +package org.apache.lucene.search.spans; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.Spans; +import org.apache.lucene.util.ToStringUtils; + +import java.io.IOException; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Set; + +/** + * Allows a given number of intersections between spans. + */ +public class SpanWithinQuery extends SpanQuery { + + private SpanQuery include; + private SpanQuery exclude; + private int proximity; + + /** + * Construct a SpanWithinQuery matching spans from include which + * overlap with spans from exclude up to proximity + * times. + */ + public SpanWithinQuery(SpanQuery include, SpanQuery exclude, int proximity) { + this.include = include; + this.exclude = exclude; + this.proximity = proximity; + + if (!include.getField().equals(exclude.getField())) { + throw new IllegalArgumentException("Clauses must have same field."); + } + } + + /** Return the SpanQuery whose matches are filtered. */ + public SpanQuery getInclude() { + return include; + } + + /** Return the SpanQuery whose matches must not overlap those returned. */ + public SpanQuery getExclude() { + return exclude; + } + + public String getField() { + return include.getField(); + } + + public void extractTerms(Set terms) { + include.extractTerms(terms); + } + + public String toString(String field) { + StringBuffer buffer = new StringBuffer(); + buffer.append("spanWithin("); + buffer.append(include.toString(field)); + buffer.append(", "); + buffer.append(proximity + " ,"); + buffer.append(exclude.toString(field)); + buffer.append(")"); + buffer.append(ToStringUtils.boost(getBoost())); + + return buffer.toString(); + } + + public Spans getSpans(final AtomicReaderContext context) throws IOException { + return new Spans() { + private Spans includeSpans = include.getSpans(context); + private boolean moreInclude = true; + private Spans excludeSpans = exclude.getSpans(context); + private boolean moreExclude = true; + + public boolean next() throws IOException { + if (moreInclude) { // move to next include + moreInclude = includeSpans.next(); + } + + while (moreInclude && moreExclude) { + if (includeSpans.doc() > excludeSpans.doc()) { // skip exclude + moreExclude = excludeSpans.skipTo(includeSpans.doc()); + } + + int count = 0; + + while (moreExclude // while exclude is before + && (includeSpans.doc() == excludeSpans.doc())) { + if ((includeSpans.end() - 1) > includeSpans.start() && excludeSpans.start() < (includeSpans.end() - 1)) { + count += 1; + + if (count > proximity) { + break; + } + } + + moreExclude = excludeSpans.next(); // increment exclude + } + + if (!moreExclude // if no intersection + || (includeSpans.doc() != excludeSpans.doc()) + || (includeSpans.end() <= excludeSpans.start())) { + break; // we found a match + } + + moreInclude = includeSpans.next(); // intersected: keep scanning + } + + return moreInclude; + } + + public boolean skipTo(int target) throws IOException { + if (moreInclude) { // skip include + moreInclude = includeSpans.skipTo(target); + } + + if (!moreInclude) { + return false; + } + + if (moreExclude // skip exclude + && (includeSpans.doc() > excludeSpans.doc())) { + moreExclude = excludeSpans.skipTo(includeSpans.doc()); + } + + int count = 0; + + while (moreExclude // while exclude is before + && (includeSpans.doc() == excludeSpans.doc())) { + if ((includeSpans.end() - 1) > includeSpans.start() && excludeSpans.start() < (includeSpans.end() - 1)) { + count += 1; + + if (count > proximity) { + break; + } + } + + moreExclude = excludeSpans.next(); // increment exclude + } + + if (!moreExclude // if no intersection + || (includeSpans.doc() != excludeSpans.doc()) + || (includeSpans.end() <= excludeSpans.start())) { + return true; // we found a match + } + + boolean returnboolean = next(); + + return returnboolean; // scan to next match + } + + public int doc() { + return includeSpans.doc(); + } + + public int start() { + return includeSpans.start(); + } + + public int end() { + return includeSpans.end(); + } + + @Override + public Collection getPayload() throws IOException { + ArrayList result = null; + if (includeSpans.isPayloadAvailable()) { + result = new ArrayList(includeSpans.getPayload()); + } + return result; + } + + @Override + public boolean isPayloadAvailable() { + return includeSpans.isPayloadAvailable(); + } + + public String toString() { + return "spans(" + SpanWithinQuery.this.toString() + ")"; + } + }; + } + + public Query rewrite(IndexReader reader) throws IOException { + SpanWithinQuery clone = null; + + SpanQuery rewrittenInclude = (SpanQuery) include.rewrite(reader); + + if (rewrittenInclude != include) { + clone = (SpanWithinQuery) this.clone(); + clone.include = rewrittenInclude; + } + + SpanQuery rewrittenExclude = (SpanQuery) exclude.rewrite(reader); + + if (rewrittenExclude != exclude) { + if (clone == null) { + clone = (SpanWithinQuery) this.clone(); + } + + clone.exclude = rewrittenExclude; + } + + if (clone != null) { + return clone; // some clauses rewrote + } else { + return this; // no clauses rewrote + } + } + + /** Returns true iff o is equal to this. */ + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof SpanWithinQuery)) { + return false; + } + + SpanWithinQuery other = (SpanWithinQuery) o; + + return this.include.equals(other.include) + && this.exclude.equals(other.exclude) + && (this.getBoost() == other.getBoost()) + && (proximity == other.proximity); + } + + public int hashCode() { + int h = include.hashCode(); + h = (h << 1) | (h >>> 31); // rotate left + h ^= exclude.hashCode(); + h = (h << 1) | (h >>> 31); // rotate left + h ^= Float.floatToRawIntBits(getBoost()); + h ^= proximity; + + return h; + } + +} Index: lucene/src/test/org/apache/lucene/search/spans/TestSpans.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/TestSpans.java (revision 1148004) +++ lucene/src/test/org/apache/lucene/search/spans/TestSpans.java (working copy) @@ -86,7 +86,8 @@ "u2 xx u1 u2", "u2 u1 xx u2", "u1 u2 xx u2", - "t1 t2 t1 t3 t2 t3" + "t1 t2 t1 t3 t2 t3", + "z1 z2 z3 bb bb bb z5 t6 z7" }; public SpanTermQuery makeSpanTermQuery(String text) { @@ -509,4 +510,25 @@ reader.close(); dir.close(); } + + public void testSpanWithin() throws IOException { + SpanWithinQuery spanQuery = new SpanWithinQuery(new SpanNearQuery( + new SpanQuery[] {makeSpanTermQuery("z5"), makeSpanTermQuery("z7")}, 5, + false), makeSpanTermQuery("t6"), 0); + checkHits(spanQuery, new int[] {}); + spanQuery = getWithinQuery(0); + checkHits(spanQuery, new int[] {}); + spanQuery = getWithinQuery(1); + checkHits(spanQuery, new int[] {}); + spanQuery = getWithinQuery(3); + checkHits(spanQuery, new int[] {12}); + spanQuery = getWithinQuery(5); + checkHits(spanQuery, new int[] {12}); + } + + private SpanWithinQuery getWithinQuery(int distance) { + return new SpanWithinQuery(new SpanNearQuery(new SpanQuery[] { + makeSpanTermQuery("z3"), makeSpanTermQuery("z5")}, 4, false), + makeSpanTermQuery("bb"), distance); + } } Index: lucene/src/test/org/apache/lucene/search/spans/TestSentence.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/TestSentence.java (revision 0) +++ lucene/src/test/org/apache/lucene/search/spans/TestSentence.java (revision 0) @@ -0,0 +1,112 @@ +package org.apache.lucene.search.spans; + +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.LuceneTestCase; + +public class TestSentence extends LuceneTestCase { + public static final String field = "field"; + public static final String START = "^"; + public static final String END = "$"; + + public void testSetPosition() throws Exception { + Analyzer analyzer = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new TokenStream() { + private final String[] TOKENS = {"1", "2", "3", END, "4", "5", "6", + END, "9"}; + private final int[] INCREMENTS = {1, 1, 1, 0, 1, 1, 1, 0, 1}; + private int i = 0; + + PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + + @Override + public boolean incrementToken() { + assertEquals(TOKENS.length, INCREMENTS.length); + if (i == TOKENS.length) return false; + clearAttributes(); + termAtt.append(TOKENS[i]); + offsetAtt.setOffset(i, i); + posIncrAtt.setPositionIncrement(INCREMENTS[i]); + i++; + return true; + } + }; + } + }; + Directory store = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer); + Document d = new Document(); + d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(d); + IndexReader reader = writer.getReader(); + writer.close(); + IndexSearcher searcher = newSearcher(reader); + + SpanTermQuery startSentence = makeSpanTermQuery(START); + SpanTermQuery endSentence = makeSpanTermQuery(END); + SpanQuery[] clauses = new SpanQuery[2]; + clauses[0] = makeSpanTermQuery("1"); + clauses[1] = makeSpanTermQuery("2"); + SpanNearQuery allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, + false); // SpanAndQuery equivalent + SpanWithinQuery query = new SpanWithinQuery(allKeywords, endSentence, 0); + System.out.println("query: " + query); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(hits.length, 1); + + clauses[1] = makeSpanTermQuery("4"); + allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, false); // SpanAndQuery + // equivalent + query = new SpanWithinQuery(allKeywords, endSentence, 0); + System.out.println("query: " + query); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(hits.length, 0); + + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term(field, "3")); + pq.add(new Term(field, "4")); + hits = searcher.search(pq, null, 1000).scoreDocs; + assertEquals(hits.length, 1); + + clauses[1] = makeSpanTermQuery("3"); + allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, false); // SpanAndQuery + // equivalent + query = new SpanWithinQuery(allKeywords, endSentence, 0); + System.out.println("query: " + query); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + reader.close(); + searcher.close(); + store.close(); + } + + public SpanTermQuery makeSpanTermQuery(String text) { + return new SpanTermQuery(new Term(field, text)); + } + + public TermQuery makeTermQuery(String text) { + return new TermQuery(new Term(field, text)); + } +}