diff --git lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java index 9907375..2990f3f 100644 --- lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java +++ lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java @@ -69,6 +69,8 @@ public class NearSpansOrdered extends Spans { private int matchDoc = -1; private int matchStart = -1; private int matchEnd = -1; + private int matchStartOffset = -1; + private int matchEndOffset = -1; private List matchPayload; private final Spans[] subSpansByDoc; @@ -116,6 +118,14 @@ public class NearSpansOrdered extends Spans { @Override public int end() { return matchEnd; } + // inherit javadocs + @Override + public int startOffset() { return matchStartOffset; } + + // inherit javadocs + @Override + public int endOffset() { return matchEndOffset; } + public Spans[] getSubSpans() { return subSpans; } @@ -266,7 +276,9 @@ public class NearSpansOrdered extends Spans { */ private boolean shrinkToAfterShortestMatch() throws IOException { matchStart = subSpans[subSpans.length - 1].start(); + matchStartOffset = subSpans[subSpans.length - 1].startOffset(); matchEnd = subSpans[subSpans.length - 1].end(); + matchEndOffset = subSpans[subSpans.length - 1].endOffset(); Set possibleMatchPayloads = new HashSet(); if (subSpans[subSpans.length - 1].isPayloadAvailable()) { possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload()); @@ -286,6 +298,7 @@ public class NearSpansOrdered extends Spans { } int prevStart = prevSpans.start(); + int prevStartOffset = prevSpans.startOffset(); int prevEnd = prevSpans.end(); while (true) { // Advance prevSpans until after (lastStart, lastEnd) if (! prevSpans.next()) { @@ -302,6 +315,7 @@ public class NearSpansOrdered extends Spans { break; // Check remaining subSpans. } else { // prevSpans still before (lastStart, lastEnd) prevStart = ppStart; + prevStartOffset = prevSpans.startOffset(); prevEnd = ppEnd; if (collectPayloads && prevSpans.isPayloadAvailable()) { Collection payload = prevSpans.getPayload(); @@ -325,6 +339,7 @@ public class NearSpansOrdered extends Spans { * that subSpans[0] is advanced after the match, if any. */ matchStart = prevStart; + matchStartOffset = prevStartOffset; lastStart = prevStart; lastEnd = prevEnd; } diff --git lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java index 0ec6fb4..a3abd97 100644 --- lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java +++ lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java @@ -119,6 +119,12 @@ public class NearSpansUnordered extends Spans { @Override public int end() { return spans.end(); } // TODO: Remove warning after API has been finalized + + @Override + public int startOffset() { return spans.startOffset(); } + + @Override + public int endOffset() { return spans.endOffset(); } @Override public Collection getPayload() throws IOException { return new ArrayList(spans.getPayload()); @@ -236,6 +242,10 @@ public class NearSpansUnordered extends Spans { public int start() { return min().start(); } @Override public int end() { return max.end(); } + @Override + public int startOffset() { return min().startOffset(); } + @Override + public int endOffset() { return max.endOffset(); } // TODO: Remove warning after API has been finalized /** diff --git lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 3c8e65a..2dd7e3a 100644 --- lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -144,6 +144,10 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { public int start() { return includeSpans.start(); } @Override public int end() { return includeSpans.end(); } + @Override + public int startOffset() { return includeSpans.startOffset(); } + @Override + public int endOffset() { return includeSpans.endOffset(); } // TODO: Remove warning after API has been finalized @Override diff --git lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 7c679dc..843853f 100644 --- lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -17,23 +17,17 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; - -import java.util.List; -import java.util.Collection; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; -import org.apache.lucene.search.Query; + +import java.io.IOException; +import java.util.*; /** Matches the union of its clauses.*/ public class SpanOrQuery extends SpanQuery implements Cloneable { @@ -235,6 +229,10 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { public int start() { return top().start(); } @Override public int end() { return top().end(); } + @Override + public int startOffset() { return top().startOffset(); } + @Override + public int endOffset() { return top().endOffset(); } @Override public Collection getPayload() throws IOException { diff --git lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index 6b12d73..c0acd0d 100644 --- lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -153,6 +153,13 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea @Override public int end() { return spans.end(); } + + @Override + public int startOffset() { return spans.startOffset(); } + + @Override + public int endOffset() { return spans.endOffset(); } + // TODO: Remove warning after API has been finalized @Override diff --git lucene/core/src/java/org/apache/lucene/search/spans/Spans.java lucene/core/src/java/org/apache/lucene/search/spans/Spans.java index 1462f60..896ff8f 100644 --- lucene/core/src/java/org/apache/lucene/search/spans/Spans.java +++ lucene/core/src/java/org/apache/lucene/search/spans/Spans.java @@ -52,6 +52,16 @@ public abstract class Spans { /** Returns the end position of the current match. Initially invalid. */ public abstract int end(); + /** Returns the starting offset of the current match, or -1 if offsets are + * not implemented. Initially invalid + */ + public abstract int startOffset(); + + /** Returns the ending offset of the current match, or -1 if offsets are + * not implemented. Initially invalid + */ + public abstract int endOffset(); + /** * Returns the payload data for the current span. * This is invalid until {@link #next()} is called for diff --git lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java index 2ce409e..b8f1376 100644 --- lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java +++ lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java @@ -36,6 +36,8 @@ public class TermSpans extends Spans { protected int freq; protected int count; protected int position; + protected int startOffset; + protected int endOffset; public TermSpans(DocsAndPositionsEnum postings, Term term) throws IOException { this.postings = postings; @@ -63,6 +65,8 @@ public class TermSpans extends Spans { count = 0; } position = postings.nextPosition(); + startOffset = postings.startOffset(); + endOffset = postings.endOffset(); count++; return true; } @@ -77,6 +81,8 @@ public class TermSpans extends Spans { freq = postings.freq(); count = 0; position = postings.nextPosition(); + startOffset = postings.startOffset(); + endOffset = postings.endOffset(); count++; return true; @@ -96,6 +102,16 @@ public class TermSpans extends Spans { public int end() { return position + 1; } + + @Override + public int startOffset() { + return startOffset; + } + + @Override + public int endOffset() { + return endOffset; + } // TODO: Remove warning after API has been finalized @Override @@ -120,7 +136,8 @@ public class TermSpans extends Spans { @Override public String toString() { return "spans(" + term.toString() + ")@" + - (doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position); + (doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc) + "-" + position + + (startOffset == -1 ? "" : ("[" + startOffset + "->" + endOffset + "]")); } public DocsAndPositionsEnum getPostings() { diff --git lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index d33ae3e..02324e0 100644 --- lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -17,10 +17,6 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Map; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Weight; @@ -28,6 +24,10 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.TermContext; +import java.io.IOException; +import java.util.Collection; +import java.util.Map; + /** * Holds all implementations of classes in the o.a.l.s.spans package as a * back-compatibility test. It does not run any tests per-se, however if @@ -65,6 +65,16 @@ final class JustCompileSearchSpans { public int start() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } + + @Override + public int startOffset() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int endOffset() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } @Override public Collection getPayload() throws IOException { @@ -133,7 +143,16 @@ final class JustCompileSearchSpans { public int start() { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } - + + @Override + public int startOffset() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int endOffset() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } } static final class JustCompileSpanScorer extends SpanScorer { diff --git lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java index f585c68..628760c 100644 --- lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java +++ lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java @@ -17,13 +17,6 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.TreeSet; - import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReaderContext; @@ -31,6 +24,9 @@ import org.apache.lucene.index.Term; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.TermContext; +import java.io.IOException; +import java.util.*; + /** * * A wrapper to perform span operations on a non-leaf reader context @@ -141,6 +137,22 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t } return current.end(); } + + @Override + public int startOffset() { + if (current == null) { + return DocsEnum.NO_MORE_DOCS; + } + return current.startOffset(); + } + + @Override + public int endOffset() { + if (current == null) { + return DocsEnum.NO_MORE_DOCS; + } + return current.endOffset(); + } @Override public Collection getPayload() throws IOException {