Index: lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java (revision 1499473) +++ lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java (working copy) @@ -364,6 +364,77 @@ } @Test + public void testSpanNotWindowOne() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "forty")); + SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 4, true); + SpanTermQuery term3 = new SpanTermQuery(new Term("field", "one")); + SpanNotQuery query = new SpanNotQuery(near, term3, 1, 1); + + checkHits(query, new int[] + {840, 842, 843, 844, 845, 846, 847, 848, 849, + 1840, 1842, 1843, 1844, 1845, 1846, 1847, 1848, 1849}); + + assertTrue(searcher.explain(query, 840).getValue() > 0.0f); + assertTrue(searcher.explain(query, 1842).getValue() > 0.0f); + } + + @Test + public void testSpanNotWindowTwoBefore() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "forty")); + SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 4, true); + SpanTermQuery term3 = new SpanTermQuery(new Term("field", "one")); + SpanNotQuery query = new SpanNotQuery(near, term3, 2, 0); + + checkHits(query, new int[] + {840, 841, 842, 843, 844, 845, 846, 847, 848, 849}); + + assertTrue(searcher.explain(query, 840).getValue() > 0.0f); + assertTrue(searcher.explain(query, 849).getValue() > 0.0f); + } + + @Test + public void testSpanNotWindowNeg() throws Exception { + //test handling of invalid window < 0 + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one")); + SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 4, true); + SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty")); + + SpanOrQuery or = new SpanOrQuery(term3); + + SpanNotQuery query = new SpanNotQuery(near, or); + + checkHits(query, new int[] + {801, 821, 831, 851, 861, 871, 881, 891, + 1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891}); + + assertTrue(searcher.explain(query, 801).getValue() > 0.0f); + assertTrue(searcher.explain(query, 891).getValue() > 0.0f); + } + + @Test + public void testSpanNotWindowDoubleExcludesBefore() throws Exception { + //test hitting two excludes before an include + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "forty")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "two")); + SpanNearQuery near = new SpanNearQuery(new SpanTermQuery[]{term1, term2}, 2, true); + SpanTermQuery exclude = new SpanTermQuery(new Term("field", "one")); + + SpanNotQuery query = new SpanNotQuery(near, exclude, 4, 1); + + checkHits(query, new int[] + {42, 242, 342, 442, 542, 642, 742, 842, 942}); + + assertTrue(searcher.explain(query, 242).getValue() > 0.0f); + assertTrue(searcher.explain(query, 942).getValue() > 0.0f); + } + + @Test public void testSpanFirst() throws Exception { SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five")); SpanFirstQuery query = new SpanFirstQuery(term1, 1); Index: lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java (revision 1499473) +++ lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java (working copy) @@ -84,7 +84,8 @@ "u2 xx u1 u2", "u2 u1 xx u2", "u1 u2 xx u2", - "t1 t2 t1 t3 t2 t3" + "t1 t2 t1 t3 t2 t3", + "s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx" }; public SpanTermQuery makeSpanTermQuery(String text) { @@ -502,4 +503,52 @@ reader.close(); dir.close(); } + + + public void testSpanNots() throws Throwable{ + assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0); + assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0); + + //focus on behind + assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0)); + assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0)); + assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0)); + assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0)); + assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0)); + + //focus on both + assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1)); + assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1)); + assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1)); + assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10)); + + //focus on ahead + assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10)); + assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1)); + assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2)); + assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3)); + assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4)); + assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8)); + + //exclude doesn't exist + assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8)); + + //include doesn't exist + assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8)); + + } + + private int spanCount(String include, String exclude, int pre, int post) throws IOException{ + SpanTermQuery iq = new SpanTermQuery(new Term(field, include)); + SpanTermQuery eq = new SpanTermQuery(new Term(field, exclude)); + SpanNotQuery snq = new SpanNotQuery(iq, eq, pre, post); + Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq); + + int i = 0; + while (spans.next()){ + i++; + } + return i; + } + } Index: lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java (revision 1499473) +++ lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java (working copy) @@ -31,16 +31,36 @@ import java.util.Map; import java.util.Set; -/** Removes matches which overlap with another SpanQuery. */ +/** Removes matches which overlap with another SpanQuery or + * within a x tokens before or y tokens after another SpanQuery. */ public class SpanNotQuery extends SpanQuery implements Cloneable { private SpanQuery include; private SpanQuery exclude; + private final int pre; + private final int post; /** Construct a SpanNotQuery matching spans from include which * have no overlap with spans from exclude.*/ public SpanNotQuery(SpanQuery include, SpanQuery exclude) { + this(include, exclude, 0, 0); + } + + + /** Construct a SpanNotQuery matching spans from include which + * have no overlap with spans from exclude within + * dist tokens of include*/ + public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) { + this(include, exclude, dist, dist); + } + + /** Construct a SpanNotQuery matching spans from include which + * have no overlap with spans from exclude within + * pre tokens before or post tokens of include*/ + public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) { this.include = include; this.exclude = exclude; + this.pre = (pre >=0) ? pre : 0; + this.post = (post >= 0) ? post : 0; if (!include.getField().equals(exclude.getField())) throw new IllegalArgumentException("Clauses must have same field."); @@ -65,6 +85,10 @@ buffer.append(include.toString(field)); buffer.append(", "); buffer.append(exclude.toString(field)); + buffer.append(", "); + buffer.append(Integer.toString(pre)); + buffer.append(", "); + buffer.append(Integer.toString(post)); buffer.append(")"); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); @@ -72,7 +96,8 @@ @Override public SpanNotQuery clone() { - SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),(SpanQuery) exclude.clone()); + SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(), + (SpanQuery) exclude.clone(), pre, post); spanNotQuery.setBoost(getBoost()); return spanNotQuery; } @@ -98,13 +123,13 @@ while (moreExclude // while exclude is before && includeSpans.doc() == excludeSpans.doc() - && excludeSpans.end() <= includeSpans.start()) { + && excludeSpans.end() <= includeSpans.start() - pre) { moreExclude = excludeSpans.next(); // increment exclude } if (!moreExclude // if no intersection || includeSpans.doc() != excludeSpans.doc() - || includeSpans.end() <= excludeSpans.start()) + || includeSpans.end()+post <= excludeSpans.start()) break; // we found a match moreInclude = includeSpans.next(); // intersected: keep scanning @@ -126,13 +151,13 @@ while (moreExclude // while exclude is before && includeSpans.doc() == excludeSpans.doc() - && excludeSpans.end() <= includeSpans.start()) { + && excludeSpans.end() <= includeSpans.start()-pre) { moreExclude = excludeSpans.next(); // increment exclude } if (!moreExclude // if no intersection || includeSpans.doc() != excludeSpans.doc() - || includeSpans.end() <= excludeSpans.start()) + || includeSpans.end()+post <= excludeSpans.start()) return true; // we found a match return next(); // scan to next match @@ -205,6 +230,8 @@ SpanNotQuery other = (SpanNotQuery)o; return this.include.equals(other.include) && this.exclude.equals(other.exclude) + && this.pre == other.pre + && this.post == other.post && this.getBoost() == other.getBoost(); } @@ -215,6 +242,8 @@ h ^= exclude.hashCode(); h = (h<<1) | (h >>> 31); // rotate left h ^= Float.floatToRawIntBits(getBoost()); + h += pre; + h += post; return h; }