Index: solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java
===================================================================
--- solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java (revision 954967)
+++ solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java (working copy)
@@ -17,22 +17,13 @@
package org.apache.solr.update;
-import java.io.IOException;
-import java.util.ArrayList;
import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
import java.util.Map;
-import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
@@ -40,9 +31,7 @@
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.SolrIndexReader;
-import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.AbstractSolrTestCase;
-import org.apache.solr.util.RefCounted;
/**
*
Index: solr/src/test/org/apache/solr/request/TestFaceting.java
===================================================================
--- solr/src/test/org/apache/solr/request/TestFaceting.java (revision 954967)
+++ solr/src/test/org/apache/solr/request/TestFaceting.java (working copy)
@@ -18,6 +18,7 @@
package org.apache.solr.request;
import org.apache.lucene.index.Term;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.After;
import org.junit.BeforeClass;
@@ -68,7 +69,7 @@
req = lrf.makeRequest("q","*:*");
TermIndex ti = new TermIndex(proto.field());
- NumberedTermEnum te = ti.getEnumerator(req.getSearcher().getReader());
+ NumberedTermsEnum te = ti.getEnumerator(req.getSearcher().getReader());
// iterate through first
while(te.term() != null) te.next();
@@ -82,11 +83,11 @@
for (int i=0; i0, te.skipTo(proto.createTerm("000")));
+ assertEquals(size>0, te.skipTo(new BytesRef("000")) != null);
assertEquals(0, te.getTermNumber());
if (size>0) {
- assertEquals(t(0), te.term().text());
+ assertEquals(t(0), te.term().utf8ToString());
} else {
assertEquals(null, te.term());
}
@@ -107,10 +108,10 @@
for (int i=0; i numTimesNext) {
- termEnum = reader.terms(protoTerm.createTerm(internalKey));
- t = termEnum.term();
+ spare.copy(internalKey);
+ termsEnum.seek(spare);
+ t = termsEnum.term();
} else {
- hasNext = termEnum.next();
- t = hasNext ? termEnum.term() : null;
+ t = termsEnum.next();
}
- if (t != null && t.field() == idName) { // intern'd comparison
- termVal = t.text();
+ if (t != null) {
+ termVal = t.utf8ToString();
} else {
termVal = lastVal;
}
@@ -319,14 +319,14 @@
}
if (!sorted) {
- termEnum = reader.terms(protoTerm.createTerm(internalKey));
- t = termEnum.term();
- if (t != null && t.field() == idName // intern'd comparison
- && internalKey.equals(t.text()))
- {
- termDocs.seek (termEnum);
- while (termDocs.next()) {
- vals[termDocs.doc()] = fval;
+ spare.copy(internalKey);
+ TermsEnum.SeekStatus result = termsEnum.seek(spare);
+ t = termsEnum.term();
+ if (result == TermsEnum.SeekStatus.FOUND) {
+ docsEnum = termsEnum.docs(delDocs, docsEnum);
+ int doc;
+ while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
+ vals[doc] = fval;
}
} else {
if (notFoundCount<10) { // collect first 10 not found for logging
@@ -342,8 +342,6 @@
} finally {
// swallow exceptions on close so we don't override any
// exceptions that happened in the loop
- if (termDocs!=null) try{termDocs.close();}catch(Exception e){}
- if (termEnum!=null) try{termEnum.close();}catch(Exception e){}
try{r.close();}catch(Exception e){}
}
Index: solr/src/java/org/apache/solr/search/SolrIndexReader.java
===================================================================
--- solr/src/java/org/apache/solr/search/SolrIndexReader.java (revision 954967)
+++ solr/src/java/org/apache/solr/search/SolrIndexReader.java (working copy)
@@ -485,11 +485,6 @@
}
@Override
- public TermPositions termPositions(Term term) throws IOException {
- return in.termPositions(term);
- }
-
- @Override
public void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
in.undeleteAll();
}
Index: solr/src/java/org/apache/solr/request/UnInvertedField.java
===================================================================
--- solr/src/java/org/apache/solr/request/UnInvertedField.java (revision 954967)
+++ solr/src/java/org/apache/solr/request/UnInvertedField.java (working copy)
@@ -20,8 +20,11 @@
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.solr.common.params.FacetParams;
@@ -36,6 +39,8 @@
import org.apache.solr.handler.component.StatsValues;
import org.apache.solr.handler.component.FieldFacetStats;
import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.ArrayList;
@@ -43,6 +48,7 @@
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
+import java.util.Comparator;
import java.util.concurrent.atomic.AtomicLong;
@@ -80,12 +86,12 @@
private static int TNUM_OFFSET=2;
static class TopTerm {
- Term term;
+ BytesRef term;
int termNum;
long memSize() {
return 8 + // obj header
- 8 + 8 +(term.text().length()<<1) + //term
+ 8 + 8 +term.length + //term
4; // int
}
}
@@ -191,7 +197,7 @@
final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)
maxTermCounts = new int[1024];
- NumberedTermEnum te = ti.getEnumerator(reader);
+ NumberedTermsEnum te = ti.getEnumerator(reader);
// threshold, over which we use set intersections instead of counting
// to (1) save memory, and (2) speed up faceting.
@@ -199,8 +205,6 @@
// the threshold even when the index is very small.
int threshold = maxDoc / 20 + 2;
// threshold = 2000000000; //////////////////////////////// USE FOR TESTING
- int[] docs = new int[1000];
- int[] freqs = new int[1000];
// we need a minimum of 9 bytes, but round up to 12 since the space would
// be wasted with most allocators anyway.
@@ -223,7 +227,7 @@
// frequent terms ahead of time.
for (;;) {
- Term t = te.term();
+ BytesRef t = te.term();
if (t==null) break;
int termNum = te.getTermNumber();
@@ -239,11 +243,11 @@
int df = te.docFreq();
if (df >= threshold) {
TopTerm topTerm = new TopTerm();
- topTerm.term = t;
+ topTerm.term = new BytesRef(t);
topTerm.termNum = termNum;
bigTerms.put(topTerm.termNum, topTerm);
- DocSet set = searcher.getDocSet(new TermQuery(topTerm.term));
+ DocSet set = searcher.getDocSet(new TermQuery(new Term(ti.field, topTerm.term.utf8ToString())));
maxTermCounts[termNum] = set.size();
te.next();
@@ -252,17 +256,19 @@
termsInverted++;
- TermDocs td = te.getTermDocs();
- td.seek(te);
+ DocsEnum td = te.getDocsEnum();
+
+ DocsEnum.BulkReadResult bulkResult = td.getBulkResult();
+
for(;;) {
- int n = td.read(docs,freqs);
+ int n = td.read();
if (n <= 0) break;
maxTermCounts[termNum] += n;
for (int i=0; i 0) {
- te.skipTo(prefix);
+ te.skipTo(new BytesRef(prefix));
startTerm = te.getTermNumber();
- te.skipTo(prefix + "\uffff\uffff\uffff\uffff");
+ te.skipTo(new BytesRef(prefix + "\uffff\uffff\uffff\uffff"));
endTerm = te.getTermNumber();
}
@@ -497,7 +503,7 @@
for (TopTerm tt : bigTerms.values()) {
// TODO: counts could be deferred if sorted==false
if (tt.termNum >= startTerm && tt.termNum < endTerm) {
- counts[tt.termNum] = searcher.numDocs(new TermQuery(tt.term), docs);
+ counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(ti.field, tt.term.utf8ToString())), docs);
}
}
@@ -669,7 +675,7 @@
final int[] index = this.index;
final int[] counts = new int[numTermsInField];//keep track of the number of times we see each word in the field for all the documents in the docset
- NumberedTermEnum te = ti.getEnumerator(searcher.getReader());
+ NumberedTermsEnum te = ti.getEnumerator(searcher.getReader());
boolean doNegative = false;
@@ -693,12 +699,13 @@
for (TopTerm tt : bigTerms.values()) {
// TODO: counts could be deferred if sorted==false
if (tt.termNum >= 0 && tt.termNum < numTermsInField) {
+ final Term t = new Term(ti.field, tt.term.utf8ToString());
if (finfo.length == 0) {
- counts[tt.termNum] = searcher.numDocs(new TermQuery(tt.term), docs);
+ counts[tt.termNum] = searcher.numDocs(new TermQuery(t), docs);
} else {
//COULD BE VERY SLOW
//if we're collecting stats for facet fields, we need to iterate on all matching documents
- DocSet bigTermDocSet = searcher.getDocSet(new TermQuery(tt.term)).intersection(docs);
+ DocSet bigTermDocSet = searcher.getDocSet(new TermQuery(t)).intersection(docs);
DocIterator iter = bigTermDocSet.iterator();
while (iter.hasNext()) {
int doc = iter.nextDoc();
@@ -795,17 +802,16 @@
- String getTermText(NumberedTermEnum te, int termNum) throws IOException {
+ String getTermText(NumberedTermsEnum te, int termNum) throws IOException {
if (bigTerms.size() > 0) {
// see if the term is one of our big terms.
TopTerm tt = bigTerms.get(termNum);
if (tt != null) {
- return tt.term.text();
+ return tt.term.utf8ToString();
}
}
- te.skipTo(termNum);
- return te.term().text();
+ return te.skipTo(termNum).utf8ToString();
}
public String toString() {
@@ -860,95 +866,97 @@
***/
-class NumberedTermEnum extends TermEnum {
+class NumberedTermsEnum extends TermsEnum {
protected final IndexReader reader;
protected final TermIndex tindex;
- protected TermEnum tenum;
+ protected TermsEnum tenum;
protected int pos=-1;
- protected Term t;
- protected TermDocs termDocs;
+ protected BytesRef termText;
+ protected DocsEnum docsEnum;
- NumberedTermEnum(IndexReader reader, TermIndex tindex) throws IOException {
+ NumberedTermsEnum(IndexReader reader, TermIndex tindex) throws IOException {
this.reader = reader;
this.tindex = tindex;
}
- NumberedTermEnum(IndexReader reader, TermIndex tindex, String termValue, int pos) throws IOException {
+ NumberedTermsEnum(IndexReader reader, TermIndex tindex, BytesRef termValue, int pos) throws IOException {
this.reader = reader;
this.tindex = tindex;
this.pos = pos;
- tenum = reader.terms(tindex.createTerm(termValue));
- setTerm();
+ Terms terms = MultiFields.getTerms(reader, tindex.field);
+ if (terms != null) {
+ tenum = terms.iterator();
+ tenum.seek(termValue);
+ setTerm();
+ }
}
- public TermDocs getTermDocs() throws IOException {
- if (termDocs==null) termDocs = reader.termDocs(t);
- else termDocs.seek(t);
- return termDocs;
+ @Override
+ public Comparator getComparator() throws IOException {
+ return tenum.getComparator();
}
- protected boolean setTerm() {
- t = tenum.term();
- if (t==null
- || t.field() != tindex.fterm.field() // intern'd compare
- || (tindex.prefix != null && !t.text().startsWith(tindex.prefix,0)) )
- {
- t = null;
- return false;
+ public DocsEnum getDocsEnum() throws IOException {
+ docsEnum = tenum.docs(MultiFields.getDeletedDocs(reader), docsEnum);
+ return docsEnum;
+ }
+
+ protected BytesRef setTerm() throws IOException {
+ termText = tenum.term();
+ if (tindex.prefix != null && !termText.startsWith(tindex.prefix)) {
+ termText = null;
}
- return true;
+ return termText;
}
-
- public boolean next() throws IOException {
+ @Override
+ public BytesRef next() throws IOException {
pos++;
- boolean b = tenum.next();
- if (!b) {
- t = null;
- return false;
+ if (tenum.next() == null) {
+ termText = null;
+ return null;
}
return setTerm(); // this is extra work if we know we are in bounds...
}
- public Term term() {
- return t;
+ @Override
+ public BytesRef term() {
+ return termText;
}
+ @Override
public int docFreq() {
return tenum.docFreq();
}
- public void close() throws IOException {
- if (tenum!=null) tenum.close();
- }
+ public BytesRef skipTo(BytesRef target) throws IOException {
- public boolean skipTo(String target) throws IOException {
- return skipTo(tindex.fterm.createTerm(target));
- }
-
- public boolean skipTo(Term target) throws IOException {
// already here
- if (t != null && t.equals(target)) return true;
+ if (termText != null && termText.equals(target)) return termText;
- int startIdx = Arrays.binarySearch(tindex.index,target.text());
+ if (tenum == null) {
+ return null;
+ }
+ int startIdx = Arrays.binarySearch(tindex.index,target);
+
if (startIdx >= 0) {
// we hit the term exactly... lucky us!
- if (tenum != null) tenum.close();
- tenum = reader.terms(target);
+ TermsEnum.SeekStatus seekStatus = tenum.seek(target);
+ assert seekStatus == TermsEnum.SeekStatus.FOUND;
pos = startIdx << tindex.intervalBits;
return setTerm();
}
// we didn't hit the term exactly
startIdx=-startIdx-1;
-
+
if (startIdx == 0) {
// our target occurs *before* the first term
- if (tenum != null) tenum.close();
- tenum = reader.terms(target);
+ TermsEnum.SeekStatus seekStatus = tenum.seek(target);
+ assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
pos = 0;
return setTerm();
}
@@ -956,53 +964,81 @@
// back up to the start of the block
startIdx--;
- if ((pos >> tindex.intervalBits) == startIdx && t != null && t.text().compareTo(target.text())<=0) {
+ if ((pos >> tindex.intervalBits) == startIdx && termText != null && termText.compareTo(target)<=0) {
// we are already in the right block and the current term is before the term we want,
// so we don't need to seek.
} else {
// seek to the right block
- if (tenum != null) tenum.close();
- tenum = reader.terms(target.createTerm(tindex.index[startIdx]));
+ TermsEnum.SeekStatus seekStatus = tenum.seek(tindex.index[startIdx]);
+ assert seekStatus == TermsEnum.SeekStatus.FOUND;
pos = startIdx << tindex.intervalBits;
- setTerm(); // should be true since it's in the index
+ setTerm(); // should be non-null since it's in the index
}
-
- while (t != null && t.text().compareTo(target.text()) < 0) {
+ while (termText != null && termText.compareTo(target) < 0) {
next();
}
- return t != null;
+ return termText;
}
-
- public boolean skipTo(int termNumber) throws IOException {
+ public BytesRef skipTo(int termNumber) throws IOException {
int delta = termNumber - pos;
if (delta < 0 || delta > tindex.interval || tenum==null) {
int idx = termNumber >>> tindex.intervalBits;
- String base = tindex.index[idx];
+ BytesRef base = tindex.index[idx];
pos = idx << tindex.intervalBits;
delta = termNumber - pos;
- if (tenum != null) tenum.close();
- tenum = reader.terms(tindex.createTerm(base));
+ TermsEnum.SeekStatus seekStatus = tenum.seek(base);
+ assert seekStatus == TermsEnum.SeekStatus.FOUND;
}
while (--delta >= 0) {
- boolean b = tenum.next();
- if (b==false) {
- t = null;
- return false;
+ BytesRef br = tenum.next();
+ if (br == null) {
+ termText = null;
+ return null;
}
++pos;
}
return setTerm();
}
+ protected void close() throws IOException {
+ // no-op, needed so the anon subclass that does indexing
+ // can build its index
+ }
+
/** The current term number, starting at 0.
* Only valid if the previous call to next() or skipTo() returned true.
*/
public int getTermNumber() {
return pos;
}
+
+ @Override
+ public long ord() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public SeekStatus seek(long ord) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public DocsEnum docs(Bits skipDocs, DocsEnum reuse) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public SeekStatus seek(BytesRef target, boolean useCache) {
+ throw new UnsupportedOperationException();
+ }
}
@@ -1018,9 +1054,9 @@
final static int intervalMask = 0xffffffff >>> (32-intervalBits);
final static int interval = 1 << intervalBits;
- final Term fterm; // prototype to be used in term construction w/o String.intern overhead
- final String prefix;
- String[] index;
+ final String field;
+ final BytesRef prefix;
+ BytesRef[] index;
int nTerms;
long sizeOfStrings;
@@ -1029,16 +1065,12 @@
}
TermIndex(String field, String prefix) {
- this.fterm = new Term(field, "");
- this.prefix = prefix;
+ this.field = field;
+ this.prefix = prefix == null ? null : new BytesRef(prefix);
}
- Term createTerm(String termVal) {
- return fterm.createTerm(termVal);
- }
-
- NumberedTermEnum getEnumerator(IndexReader reader, int termNumber) throws IOException {
- NumberedTermEnum te = new NumberedTermEnum(reader, this);
+ NumberedTermsEnum getEnumerator(IndexReader reader, int termNumber) throws IOException {
+ NumberedTermsEnum te = new NumberedTermsEnum(reader, this);
te.skipTo(termNumber);
return te;
}
@@ -1047,38 +1079,37 @@
with next() to fully traverse all of the terms so the index
will be built.
*/
- NumberedTermEnum getEnumerator(IndexReader reader) throws IOException {
- if (index==null) return new NumberedTermEnum(reader,this, prefix==null?"":prefix, 0) {
- ArrayList lst;
+ NumberedTermsEnum getEnumerator(IndexReader reader) throws IOException {
+ if (index==null) return new NumberedTermsEnum(reader,this, prefix==null?new BytesRef():prefix, 0) {
+ ArrayList lst;
- protected boolean setTerm() {
- boolean b = super.setTerm();
- if (b && (pos & intervalMask)==0) {
- String text = term().text();
- sizeOfStrings += text.length() << 1;
+ protected BytesRef setTerm() throws IOException {
+ BytesRef br = super.setTerm();
+ if (br != null && (pos & intervalMask)==0) {
+ sizeOfStrings += br.length;
if (lst==null) {
- lst = new ArrayList();
+ lst = new ArrayList();
}
- lst.add(text);
+ lst.add(new BytesRef(br));
}
- return b;
+ return br;
}
- public boolean skipTo(Term target) throws IOException {
+ public BytesRef skipTo(Term target) throws IOException {
throw new UnsupportedOperationException();
}
- public boolean skipTo(int termNumber) throws IOException {
+ public BytesRef skipTo(int termNumber) throws IOException {
throw new UnsupportedOperationException();
}
public void close() throws IOException {
nTerms=pos;
super.close();
- index = lst!=null ? lst.toArray(new String[lst.size()]) : new String[0];
+ index = lst!=null ? lst.toArray(new BytesRef[lst.size()]) : new BytesRef[0];
}
};
- else return new NumberedTermEnum(reader,this,"",0);
+ else return new NumberedTermsEnum(reader,this,new BytesRef(),0);
}
Index: solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
===================================================================
--- solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (revision 954967)
+++ solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (working copy)
@@ -37,13 +37,17 @@
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.analysis.CharFilterFactory;
import org.apache.solr.analysis.TokenFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
@@ -468,18 +472,18 @@
indexInfo.add("maxDoc", reader.maxDoc());
if( countTerms ) {
- TermEnum te = null;
- try{
- te = reader.terms();
- int numTerms = 0;
- while (te.next()) {
- numTerms++;
+ Fields fields = MultiFields.getFields(reader);
+ int numTerms = 0;
+ if (fields != null) {
+ FieldsEnum fieldsEnum = fields.iterator();
+ while(fieldsEnum.next() != null) {
+ TermsEnum termsEnum = fieldsEnum.terms();
+ while(termsEnum.next() != null) {
+ numTerms++;
+ }
}
- indexInfo.add("numTerms", numTerms );
}
- finally{
- if( te != null ) te.close();
- }
+ indexInfo.add("numTerms", numTerms );
}
indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
@@ -528,7 +532,6 @@
int maxBucket = -1;
public Map hist = new HashMap();
- private static final double LOG2 = Math.log( 2 );
public static int getPowerOfTwoBucket( int num )
{
return Math.max(1, Integer.highestOneBit(num-1) << 1);
@@ -621,45 +624,48 @@
private static Map getTopTerms( IndexReader reader, Set fields, int numTerms, Set junkWords ) throws Exception
{
Map info = new HashMap();
-
- TermEnum terms = null;
- try{
- terms = reader.terms();
- while (terms.next()) {
- String field = terms.term().field();
- String t = terms.term().text();
+
+ Fields fieldsC = MultiFields.getFields(reader);
+ if (fieldsC != null) {
+ FieldsEnum fieldsEnum = fieldsC.iterator();
+ String field;
+ while((field = fieldsEnum.next()) != null) {
+
+ TermsEnum termsEnum = fieldsEnum.terms();
+ BytesRef text;
+ while((text = termsEnum.next()) != null) {
+ String t = text.utf8ToString();
- // Compute distinct terms for every field
- TopTermQueue tiq = info.get( field );
- if( tiq == null ) {
- tiq = new TopTermQueue( numTerms+1 );
- info.put( field, tiq );
- }
- tiq.distinctTerms++;
- tiq.histogram.add( terms.docFreq() ); // add the term to the histogram
+ // Compute distinct terms for every field
+ TopTermQueue tiq = info.get( field );
+ if( tiq == null ) {
+ tiq = new TopTermQueue( numTerms+1 );
+ info.put( field, tiq );
+ }
+
+ tiq.distinctTerms++;
+ tiq.histogram.add( termsEnum.docFreq() ); // add the term to the histogram
- // Only save the distinct terms for fields we worry about
- if (fields != null && fields.size() > 0) {
- if( !fields.contains( field ) ) {
+ // Only save the distinct terms for fields we worry about
+ if (fields != null && fields.size() > 0) {
+ if( !fields.contains( field ) ) {
+ continue;
+ }
+ }
+ if( junkWords != null && junkWords.contains( t ) ) {
continue;
}
- }
- if( junkWords != null && junkWords.contains( t ) ) {
- continue;
- }
- if( terms.docFreq() > tiq.minFreq ) {
- tiq.add(new TopTermQueue.TermInfo(terms.term(), terms.docFreq()));
+ if( termsEnum.docFreq() > tiq.minFreq ) {
+ tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
if (tiq.size() > numTerms) { // if tiq full
- tiq.pop(); // remove lowest in tiq
- tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
+ tiq.pop(); // remove lowest in tiq
+ tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
+ }
}
}
}
}
- finally {
- if( terms != null ) terms.close();
- }
return info;
}
}
Index: solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java
===================================================================
--- solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java (revision 954967)
+++ solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java (working copy)
@@ -4,9 +4,12 @@
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.TermVectorMapper;
import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
@@ -31,7 +34,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Set;
-import java.util.logging.Logger;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -252,9 +255,12 @@
int result = 1;
currentTerm = currentTerm.createTerm(term);
try {
- TermEnum termEnum = reader.terms(currentTerm);
- if (termEnum != null && termEnum.term().equals(currentTerm)) {
- result = termEnum.docFreq();
+ Terms terms = MultiFields.getTerms(reader, currentTerm.field());
+ if (terms != null) {
+ TermsEnum termsEnum = terms.iterator();
+ if (termsEnum.seek(new BytesRef(term)) == TermsEnum.SeekStatus.FOUND) {
+ result = termsEnum.docFreq();
+ }
}
} catch (IOException e) {
throw new RuntimeException(e);
Index: solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java
===================================================================
--- solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java (revision 954967)
+++ solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java (working copy)
@@ -21,19 +21,18 @@
import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.BytesRef;
/**
* HighFrequencyDictionary: terms taken from the given field
* of a Lucene index, which appear in a number of documents
* above a given threshold.
*
- * When using IndexReader.terms(Term) the code must not call next() on TermEnum
- * as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
- *
* Threshold is a value in [0..1] representing the minimum
* number of documents (of the total) where a term should appear.
*
@@ -55,41 +54,34 @@
}
final class HighFrequencyIterator implements Iterator {
- private TermEnum termEnum;
- private Term actualTerm;
+ private TermsEnum termsEnum;
+ private BytesRef actualTerm;
private boolean hasNextCalled;
private int minNumDocs;
HighFrequencyIterator() {
try {
- termEnum = reader.terms(new Term(field, ""));
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ termsEnum = terms.iterator();
+ }
minNumDocs = (int)(thresh * (float)reader.numDocs());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
- private boolean isFrequent(Term term) {
- try {
- return reader.docFreq(term) >= minNumDocs;
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
+ private boolean isFrequent(int freq) {
+ return freq >= minNumDocs;
}
public Object next() {
- if (!hasNextCalled) {
- hasNext();
+ if (!hasNextCalled && !hasNext()) {
+ return null;
}
hasNextCalled = false;
- try {
- termEnum.next();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
-
- return (actualTerm != null) ? actualTerm.text() : null;
+ return (actualTerm != null) ? actualTerm.utf8ToString() : null;
}
public boolean hasNext() {
@@ -98,35 +90,28 @@
}
hasNextCalled = true;
- do {
- actualTerm = termEnum.term();
+ if (termsEnum == null) {
+ return false;
+ }
+ while(true) {
+
+ try {
+ actualTerm = termsEnum.next();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
// if there are no words return false
if (actualTerm == null) {
return false;
}
- String currentField = actualTerm.field();
-
- // if the next word doesn't have the same field return false
- if (currentField != field) { // intern'd comparison
- actualTerm = null;
- return false;
- }
-
// got a valid term, does it pass the threshold?
- if (isFrequent(actualTerm)) {
+ if (isFrequent(termsEnum.docFreq())) {
return true;
}
-
- // term not up to threshold
- try {
- termEnum.next();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
-
- } while (true);
+ }
}
public void remove() {
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (revision 954967)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (working copy)
@@ -12,10 +12,13 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
/**
@@ -279,8 +282,11 @@
// Make sure position is still incremented when
// massive term is skipped:
- TermPositions tps = reader.termPositions(new Term("content", "another"));
- assertTrue(tps.next());
+ DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "content",
+ new BytesRef("another"));
+ assertTrue(tps.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(1, tps.freq());
assertEquals(3, tps.nextPosition());
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (revision 954967)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (working copy)
@@ -29,9 +29,11 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.BytesRef;
public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
@@ -82,10 +84,16 @@
writer.close();
IndexReader reader = IndexReader.open(dir, true);
- TermDocs td = reader.termDocs(new Term("partnum", "Q36"));
- assertTrue(td.next());
- td = reader.termDocs(new Term("partnum", "Q37"));
- assertTrue(td.next());
+ DocsEnum td = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "partnum",
+ new BytesRef("Q36"));
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ td = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "partnum",
+ new BytesRef("Q37"));
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
}
// LUCENE-1441
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (revision 954967)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (working copy)
@@ -18,12 +18,15 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.io.Reader;
@@ -141,20 +144,15 @@
*/
public int addStopWords(IndexReader reader, String fieldName, int maxDocFreq) throws IOException {
HashSet stopWords = new HashSet();
- String internedFieldName = StringHelper.intern(fieldName);
- TermEnum te = reader.terms(new Term(fieldName));
- Term term = te.term();
- while (term != null) {
- if (term.field() != internedFieldName) {
- break;
+ Terms terms = MultiFields.getTerms(reader, fieldName);
+ if (terms != null) {
+ TermsEnum te = terms.iterator();
+ BytesRef text;
+ while ((text = te.next()) != null) {
+ if (te.docFreq() > maxDocFreq) {
+ stopWords.add(text.utf8ToString());
+ }
}
- if (te.docFreq() > maxDocFreq) {
- stopWords.add(term.text());
- }
- if (!te.next()) {
- break;
- }
- term = te.term();
}
stopWordsPerField.put(fieldName, stopWords);
Index: lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java
===================================================================
--- lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (working copy)
@@ -27,11 +27,12 @@
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.BytesRef;
public class TestCachingTokenFilter extends BaseTokenStreamTestCase {
private String[] tokens = new String[] {"term1", "term2", "term3", "term2"};
@@ -75,19 +76,28 @@
writer.close();
IndexReader reader = IndexReader.open(dir, true);
- TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1"));
- assertTrue(termPositions.next());
+ DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "preanalyzed",
+ new BytesRef("term1"));
+ assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(1, termPositions.freq());
assertEquals(0, termPositions.nextPosition());
- termPositions.seek(new Term("preanalyzed", "term2"));
- assertTrue(termPositions.next());
+ termPositions = MultiFields.getTermPositionsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "preanalyzed",
+ new BytesRef("term2"));
+ assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(2, termPositions.freq());
assertEquals(1, termPositions.nextPosition());
assertEquals(3, termPositions.nextPosition());
- termPositions.seek(new Term("preanalyzed", "term3"));
- assertTrue(termPositions.next());
+ termPositions = MultiFields.getTermPositionsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "preanalyzed",
+ new BytesRef("term3"));
+ assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(1, termPositions.freq());
assertEquals(2, termPositions.nextPosition());
reader.close();
Index: lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java (working copy)
@@ -32,11 +32,12 @@
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositions;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.store.Directory;
@@ -50,6 +51,7 @@
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
+import org.apache.lucene.util.BytesRef;
/**
* Term position unit test.
@@ -99,13 +101,19 @@
IndexSearcher searcher = new IndexSearcher(store, true);
- TermPositions pos = searcher.getIndexReader().termPositions(new Term("field", "1"));
- pos.next();
+ DocsAndPositionsEnum pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(),
+ MultiFields.getDeletedDocs(searcher.getIndexReader()),
+ "field",
+ new BytesRef("1"));
+ pos.nextDoc();
// first token should be at position 0
assertEquals(0, pos.nextPosition());
- pos = searcher.getIndexReader().termPositions(new Term("field", "2"));
- pos.next();
+ pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(),
+ MultiFields.getDeletedDocs(searcher.getIndexReader()),
+ "field",
+ new BytesRef("2"));
+ pos.nextDoc();
// second token should be at position 2
assertEquals(2, pos.nextPosition());
@@ -238,9 +246,13 @@
IndexReader r = writer.getReader();
- TermPositions tp = r.termPositions(new Term("content", "a"));
+ DocsAndPositionsEnum tp = MultiFields.getTermPositionsEnum(r,
+ MultiFields.getDeletedDocs(r),
+ "content",
+ new BytesRef("a"));
+
int count = 0;
- assertTrue(tp.next());
+ assertTrue(tp.nextDoc() != tp.NO_MORE_DOCS);
// "a" occurs 4 times
assertEquals(4, tp.freq());
int expected = 0;
@@ -250,7 +262,7 @@
assertEquals(6, tp.nextPosition());
// only one doc has "a"
- assertFalse(tp.next());
+ assertEquals(tp.NO_MORE_DOCS, tp.nextDoc());
IndexSearcher is = new IndexSearcher(r);
Index: lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy)
@@ -420,9 +420,4 @@
//assertEquals("C added => A,B,,C in range", 3, hits.length());
searcher.close();
}
-
- @Deprecated
- public void testBackwardsLayer() {
- assertTrue(new TermRangeQuery("dummy", null, null, true, true).hasNewAPI);
- }
}
Index: lucene/src/test/org/apache/lucene/search/TestTermVectors.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestTermVectors.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestTermVectors.java (working copy)
@@ -244,40 +244,38 @@
writer.addDocument(testDoc4);
writer.close();
IndexSearcher knownSearcher = new IndexSearcher(dir, true);
- TermEnum termEnum = knownSearcher.reader.terms();
- TermDocs termDocs = knownSearcher.reader.termDocs();
- //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);
-
- //Similarity sim = knownSearcher.getSimilarity();
- while (termEnum.next() == true)
- {
- Term term = termEnum.term();
- //System.out.println("Term: " + term);
- termDocs.seek(term);
- while (termDocs.next())
- {
- int docId = termDocs.doc();
- int freq = termDocs.freq();
- //System.out.println("Doc Id: " + docId + " freq " + freq);
- TermFreqVector vector = knownSearcher.reader.getTermFreqVector(docId, "field");
- //float tf = sim.tf(freq);
- //float idf = sim.idf(knownSearcher.docFreq(term), knownSearcher.maxDoc());
- //float qNorm = sim.queryNorm()
- //This is fine since we don't have stop words
- //float lNorm = sim.lengthNorm("field", vector.getTerms().length);
- //float coord = sim.coord()
- //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
- assertTrue(vector != null);
- String[] vTerms = vector.getTerms();
- int [] freqs = vector.getTermFrequencies();
- for (int i = 0; i < vTerms.length; i++)
- {
- if (term.text().equals(vTerms[i]))
- {
- assertTrue(freqs[i] == freq);
- }
+ FieldsEnum fields = MultiFields.getFields(knownSearcher.reader).iterator();
+
+ DocsEnum docs = null;
+ while(fields.next() != null) {
+ TermsEnum terms = fields.terms();
+ while(terms.next() != null) {
+ String text = terms.term().utf8ToString();
+ docs = terms.docs(MultiFields.getDeletedDocs(knownSearcher.reader), docs);
+
+ while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ int docId = docs.docID();
+ int freq = docs.freq();
+ //System.out.println("Doc Id: " + docId + " freq " + freq);
+ TermFreqVector vector = knownSearcher.reader.getTermFreqVector(docId, "field");
+ //float tf = sim.tf(freq);
+ //float idf = sim.idf(knownSearcher.docFreq(term), knownSearcher.maxDoc());
+ //float qNorm = sim.queryNorm()
+ //This is fine since we don't have stop words
+ //float lNorm = sim.lengthNorm("field", vector.getTerms().length);
+ //float coord = sim.coord()
+ //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
+ assertTrue(vector != null);
+ String[] vTerms = vector.getTerms();
+ int [] freqs = vector.getTermFrequencies();
+ for (int i = 0; i < vTerms.length; i++)
+ {
+ if (text.equals(vTerms[i]))
+ {
+ assertTrue(freqs[i] == freq);
+ }
+ }
}
-
}
//System.out.println("--------");
}
Index: lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java (working copy)
@@ -21,11 +21,13 @@
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
@@ -79,13 +81,16 @@
// this TermEnum gives "piccadilly", "pie" and "pizza".
String prefix = "pi";
- TermEnum te = ir.terms(new Term("body", prefix + "*"));
+ TermsEnum te = MultiFields.getFields(ir).terms("body").iterator();
+ te.seek(new BytesRef(prefix));
do {
- if (te.term().text().startsWith(prefix))
- {
- termsWithPrefix.add(te.term());
+ String s = te.term().utf8ToString();
+ if (s.startsWith(prefix)) {
+ termsWithPrefix.add(new Term("body", s));
+ } else {
+ break;
}
- } while (te.next());
+ } while (te.next() != null);
query1.add(termsWithPrefix.toArray(new Term[0]));
query2.add(termsWithPrefix.toArray(new Term[0]));
Index: lucene/src/test/org/apache/lucene/search/TestWildcard.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestWildcard.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestWildcard.java (working copy)
@@ -19,12 +19,10 @@
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.Index;
-import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
@@ -309,62 +307,4 @@
searcher.close();
}
- @Deprecated
- private static final class OldWildcardQuery extends MultiTermQuery {
- final Term term;
-
- OldWildcardQuery(Term term) {
- this.term = term;
- }
-
- @Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new WildcardTermEnum(reader, term);
- }
-
- @Override
- public String toString(String field) {
- return "OldWildcard(" + term.toString()+ ")";
- }
- }
-
- @Deprecated
- public void testDeprecatedTermEnum() throws Exception {
- RAMDirectory indexStore = getIndexStore("body", new String[]
- {"metal", "metals"});
- IndexSearcher searcher = new IndexSearcher(indexStore, true);
- Query query1 = new TermQuery(new Term("body", "metal"));
- Query query2 = new OldWildcardQuery(new Term("body", "metal*"));
- Query query3 = new OldWildcardQuery(new Term("body", "m*tal"));
- Query query4 = new OldWildcardQuery(new Term("body", "m*tal*"));
- Query query5 = new OldWildcardQuery(new Term("body", "m*tals"));
-
- BooleanQuery query6 = new BooleanQuery();
- query6.add(query5, BooleanClause.Occur.SHOULD);
-
- BooleanQuery query7 = new BooleanQuery();
- query7.add(query3, BooleanClause.Occur.SHOULD);
- query7.add(query5, BooleanClause.Occur.SHOULD);
-
- // Queries do not automatically lower-case search terms:
- Query query8 = new OldWildcardQuery(new Term("body", "M*tal*"));
-
- assertMatches(searcher, query1, 1);
- assertMatches(searcher, query2, 2);
- assertMatches(searcher, query3, 1);
- assertMatches(searcher, query4, 2);
- assertMatches(searcher, query5, 1);
- assertMatches(searcher, query6, 1);
- assertMatches(searcher, query7, 2);
- assertMatches(searcher, query8, 0);
- assertMatches(searcher, new OldWildcardQuery(new Term("body", "*tall")), 0);
- assertMatches(searcher, new OldWildcardQuery(new Term("body", "*tal")), 1);
- assertMatches(searcher, new OldWildcardQuery(new Term("body", "*tal*")), 2);
- }
-
- @Deprecated
- public void testBackwardsLayer() {
- assertTrue(new WildcardQuery(new Term("body", "metal*")).hasNewAPI);
- assertFalse(new OldWildcardQuery(new Term("body", "metal*")).hasNewAPI);
- }
}
Index: lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java (working copy)
@@ -196,10 +196,4 @@
assertSame(TermsEnum.EMPTY, aq.getTermsEnum(searcher.getIndexReader()));
assertEquals(0, automatonQueryNrHits(aq));
}
-
- @Deprecated
- public void testBackwardsLayer() {
- assertTrue(new AutomatonQuery(newTerm("bogus"), BasicAutomata
- .makeString("piece")).hasNewAPI);
- }
}
Index: lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java (working copy)
@@ -377,10 +377,4 @@
doc.add(new Field("field", text, Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
}
-
- @Deprecated
- public void testBackwardsLayer() {
- assertTrue(new FuzzyQuery(new Term("dummy", "dummy")).hasNewAPI);
- }
-
}
Index: lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java (working copy)
@@ -25,7 +25,6 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.Automaton;
@@ -120,9 +119,4 @@
public void testBacktracking() throws IOException {
assertEquals(1, regexQueryNrHits("4934[314]"));
}
-
- @Deprecated
- public void testBackwardsLayer() {
- assertTrue(new RegexpQuery(newTerm(".*")).hasNewAPI);
- }
}
Index: lucene/src/test/org/apache/lucene/search/JustCompileSearch.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/JustCompileSearch.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/JustCompileSearch.java (working copy)
@@ -298,25 +298,6 @@
}
- static final class JustCompileFilteredTermEnum extends FilteredTermEnum {
-
- @Override
- public float difference() {
- throw new UnsupportedOperationException(UNSUPPORTED_MSG);
- }
-
- @Override
- protected boolean endEnum() {
- throw new UnsupportedOperationException(UNSUPPORTED_MSG);
- }
-
- @Override
- protected boolean termCompare(Term term) {
- throw new UnsupportedOperationException(UNSUPPORTED_MSG);
- }
-
- }
-
static final class JustCompilePhraseScorer extends PhraseScorer {
JustCompilePhraseScorer(Weight weight, DocsAndPositionsEnum[] docs, int[] offsets,
Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy)
@@ -590,10 +590,4 @@
);
// difference to int range is tested in TestNumericRangeQuery32
}
-
- @Test @Deprecated
- public void testBackwardsLayer() {
- assertTrue(NumericRangeQuery.newLongRange("dummy", null, null, true, true).hasNewAPI);
- }
-
}
Index: lucene/src/test/org/apache/lucene/search/TestMultiTermQueryBWComp.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestMultiTermQueryBWComp.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestMultiTermQueryBWComp.java (working copy)
@@ -1,239 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.LuceneTestCaseJ4;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import static org.junit.Assert.*;
-
-/**
- * Test MultiTermQuery api backwards compat
- * @deprecated Remove test when old API is no longer supported
- */
-@Deprecated
-public class TestMultiTermQueryBWComp extends LuceneTestCaseJ4 {
- private static RAMDirectory dir;
- private static Searcher searcher;
- private static final String FIELD = "test";
-
- /**
- * Test that the correct method (getTermsEnum/getEnum) is called.
- */
- @Test
- public void testEnumMethod() throws IOException {
- assertAPI("old", new OldAPI(FIELD));
- assertAPI("new", new NewAPI(FIELD));
- assertAPI("new", new BothAPI(FIELD));
-
- assertAPI("old2", new OldExtendsOldAPI(FIELD));
- assertAPI("old2", new OldExtendsNewAPI(FIELD));
- assertAPI("old2", new OldExtendsBothAPI(FIELD));
-
- assertAPI("new2", new NewExtendsOldAPI(FIELD));
- assertAPI("new2", new NewExtendsNewAPI(FIELD));
- assertAPI("new2", new NewExtendsBothAPI(FIELD));
-
- assertAPI("new2", new BothExtendsOldAPI(FIELD));
- assertAPI("new2", new BothExtendsNewAPI(FIELD));
- assertAPI("new2", new BothExtendsBothAPI(FIELD));
- }
-
- private static void assertAPI(String expected, Query query) throws IOException {
- TopDocs td = searcher.search(query, 25);
- assertEquals(1, td.totalHits);
- Document doc = searcher.doc(td.scoreDocs[0].doc);
- assertEquals(expected, doc.get(FIELD));
- }
-
- private class OldAPI extends MultiTermQuery {
- OldAPI(String field) { super(field); }
-
- @Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new SingleTermEnum(reader, new Term(FIELD, "old"));
- }
-
- @Override
- public String toString(String field) { return null; }
- }
-
- private class NewAPI extends MultiTermQuery {
- NewAPI(String field) { super(field); }
-
- @Override
- protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- return new SingleTermsEnum(reader, new Term(FIELD, "new"));
- }
-
- @Override
- public String toString(String field) { return null; }
- }
-
- private class BothAPI extends MultiTermQuery {
- BothAPI(String field) { super(field); }
-
- @Override
- protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- return new SingleTermsEnum(reader, new Term(FIELD, "new"));
- }
-
- @Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new SingleTermEnum(reader, new Term(FIELD, "old"));
- }
-
- @Override
- public String toString(String field) { return null; }
- }
-
- private class OldExtendsOldAPI extends OldAPI {
- OldExtendsOldAPI(String field) { super(field); }
-
- @Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new SingleTermEnum(reader, new Term(FIELD, "old2"));
- }
- }
-
- private class OldExtendsNewAPI extends NewAPI {
- OldExtendsNewAPI(String field) { super(field); }
-
- @Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new SingleTermEnum(reader, new Term(FIELD, "old2"));
- }
- }
-
- private class OldExtendsBothAPI extends BothAPI {
- OldExtendsBothAPI(String field) { super(field); }
-
- @Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new SingleTermEnum(reader, new Term(FIELD, "old2"));
- }
- }
-
- private class NewExtendsOldAPI extends OldAPI {
- NewExtendsOldAPI(String field) { super(field); }
-
- @Override
- protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- return new SingleTermsEnum(reader, new Term(FIELD, "new2"));
- }
- }
-
- private class NewExtendsNewAPI extends NewAPI {
- NewExtendsNewAPI(String field) { super(field); }
-
- @Override
- protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- return new SingleTermsEnum(reader, new Term(FIELD, "new2"));
- }
- }
-
- private class NewExtendsBothAPI extends BothAPI {
- NewExtendsBothAPI(String field) { super(field); }
-
- @Override
- protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- return new SingleTermsEnum(reader, new Term(FIELD, "new2"));
- }
- }
-
- private class BothExtendsOldAPI extends OldAPI {
- BothExtendsOldAPI(String field) { super(field); }
-
- @Override
- protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- return new SingleTermsEnum(reader, new Term(FIELD, "new2"));
- }
-
- @Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new SingleTermEnum(reader, new Term(FIELD, "old2"));
- }
- }
-
- private class BothExtendsNewAPI extends NewAPI {
- BothExtendsNewAPI(String field) { super(field); }
-
- @Override
- protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- return new SingleTermsEnum(reader, new Term(FIELD, "new2"));
- }
-
- @Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new SingleTermEnum(reader, new Term(FIELD, "old2"));
- }
- }
-
- private class BothExtendsBothAPI extends BothAPI {
- BothExtendsBothAPI(String field) { super(field); }
-
- @Override
- protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- return new SingleTermsEnum(reader, new Term(FIELD, "new2"));
- }
-
- @Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new SingleTermEnum(reader, new Term(FIELD, "old2"));
- }
- }
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- dir = new RAMDirectory();
- IndexWriter writer = new IndexWriter(dir,
- new MockAnalyzer(), true,
- IndexWriter.MaxFieldLength.LIMITED);
-
- String values[] = { "old", "old2", "new", "new2" };
- for (String value : values) {
- Document doc = new Document();
- doc.add(new Field(FIELD, value,
- Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(doc);
- }
-
- writer.optimize();
- writer.close();
- searcher = new IndexSearcher(dir, true);
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- searcher.close();
- searcher = null;
- dir.close();
- dir = null;
- }
-}
Index: lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java (working copy)
@@ -59,9 +59,4 @@
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("everything", 3, hits.length);
}
-
- @Deprecated
- public void testBackwardsLayer() {
- assertTrue(new PrefixQuery(new Term("dummy", "dummy")).hasNewAPI);
- }
}
Index: lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (working copy)
@@ -19,21 +19,20 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.Version;
import java.io.IOException;
-import java.util.HashSet;
import java.util.LinkedList;
-import java.util.Collections;
/**
* This class tests the MultiPhraseQuery class.
@@ -73,13 +72,16 @@
// this TermEnum gives "piccadilly", "pie" and "pizza".
String prefix = "pi";
- TermEnum te = ir.terms(new Term("body", prefix));
+ TermsEnum te = MultiFields.getFields(ir).terms("body").iterator();
+ te.seek(new BytesRef(prefix));
do {
- if (te.term().text().startsWith(prefix))
- {
- termsWithPrefix.add(te.term());
+ String s = te.term().utf8ToString();
+ if (s.startsWith(prefix)) {
+ termsWithPrefix.add(new Term("body", s));
+ } else {
+ break;
}
- } while (te.next());
+ } while (te.next() != null);
query1.add(termsWithPrefix.toArray(new Term[0]));
assertEquals("body:\"blueberry (piccadilly pie pizza)\"", query1.toString());
@@ -96,13 +98,14 @@
MultiPhraseQuery query3 = new MultiPhraseQuery();
termsWithPrefix.clear();
prefix = "blue";
- te = ir.terms(new Term("body", prefix));
+ te.seek(new BytesRef(prefix));
+
do {
- if (te.term().text().startsWith(prefix))
+ if (te.term().utf8ToString().startsWith(prefix))
{
- termsWithPrefix.add(te.term());
+ termsWithPrefix.add(new Term("body", te.term().utf8ToString()));
}
- } while (te.next());
+ } while (te.next() != null);
ir.close();
query3.add(termsWithPrefix.toArray(new Term[0]));
query3.add(new Term("body", "pizza"));
Index: lucene/src/test/org/apache/lucene/index/TestDoc.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestDoc.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestDoc.java (working copy)
@@ -213,15 +213,19 @@
for (int i = 0; i < reader.numDocs(); i++)
out.println(reader.document(i));
- TermEnum tis = reader.terms();
- while (tis.next()) {
- out.print(tis.term());
- out.println(" DF=" + tis.docFreq());
+ FieldsEnum fis = reader.fields().iterator();
+ String field = fis.next();
+ while(field != null) {
+ TermsEnum tis = fis.terms();
+ while(tis.next() != null) {
- TermPositions positions = reader.termPositions(tis.term());
- try {
- while (positions.next()) {
- out.print(" doc=" + positions.doc());
+ out.print(" term=" + field + ":" + tis.term());
+ out.println(" DF=" + tis.docFreq());
+
+ DocsAndPositionsEnum positions = tis.docsAndPositions(reader.getDeletedDocs(), null);
+
+ while (positions.nextDoc() != positions.NO_MORE_DOCS) {
+ out.print(" doc=" + positions.docID());
out.print(" TF=" + positions.freq());
out.print(" pos=");
out.print(positions.nextPosition());
@@ -229,11 +233,9 @@
out.print("," + positions.nextPosition());
out.println("");
}
- } finally {
- positions.close();
}
+ field = fis.next();
}
- tis.close();
reader.close();
}
}
Index: lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java (working copy)
@@ -26,6 +26,7 @@
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.Bits;
public class TestParallelTermEnum extends LuceneTestCase {
private IndexReader ir1;
@@ -77,105 +78,123 @@
pr.add(ir1);
pr.add(ir2);
- TermDocs td = pr.termDocs();
+ Bits delDocs = pr.getDeletedDocs();
- TermEnum te = pr.terms();
- assertTrue(te.next());
- assertEquals("field1:brown", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field1:fox", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field1:jumps", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field1:quick", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field1:the", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field2:brown", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field2:fox", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field2:jumps", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field2:quick", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field2:the", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field3:dog", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field3:fox", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field3:jumps", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field3:lazy", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field3:over", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertTrue(te.next());
- assertEquals("field3:the", te.term().toString());
- td.seek(te.term());
- assertTrue(td.next());
- assertEquals(0, td.doc());
- assertFalse(td.next());
- assertFalse(te.next());
+ FieldsEnum fe = pr.fields().iterator();
+
+ String f = fe.next();
+ assertEquals("field0", f);
+ f = fe.next();
+ assertEquals("field1", f);
+
+ TermsEnum te = fe.terms();
+
+ assertEquals("brown", te.next().utf8ToString());
+ DocsEnum td = te.docs(delDocs, null);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("fox", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("jumps", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("quick", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("the", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertNull(te.next());
+ f = fe.next();
+ assertEquals("field2", f);
+ te = fe.terms();
+
+ assertEquals("brown", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("fox", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("jumps", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("quick", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("the", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertNull(te.next());
+ f = fe.next();
+ assertEquals("field3", f);
+ te = fe.terms();
+
+ assertEquals("dog", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("fox", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("jumps", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("lazy", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("over", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertEquals("the", te.next().utf8ToString());
+ td = te.docs(delDocs, td);
+ assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, td.docID());
+ assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
+
+ assertNull(te.next());
}
}
Index: lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java (working copy)
@@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -83,36 +84,32 @@
throws IOException
{
IndexReader reader = IndexReader.open(dir, true);
- TermEnum termEnum = null;
+ TermsEnum termEnum = MultiFields.getTerms(reader, "content").iterator();
// create enumeration of all terms
- termEnum = reader.terms();
// go to the first term (aaa)
termEnum.next();
// assert that term is 'aaa'
- assertEquals("aaa", termEnum.term().text());
+ assertEquals("aaa", termEnum.term().utf8ToString());
assertEquals(200, termEnum.docFreq());
// go to the second term (bbb)
termEnum.next();
// assert that term is 'bbb'
- assertEquals("bbb", termEnum.term().text());
+ assertEquals("bbb", termEnum.term().utf8ToString());
assertEquals(100, termEnum.docFreq());
- termEnum.close();
-
- // create enumeration of terms after term 'aaa', including 'aaa'
- termEnum = reader.terms(new Term("content", "aaa"));
+ // create enumeration of terms after term 'aaa',
+ // including 'aaa'
+ termEnum.seek(new BytesRef("aaa"));
// assert that term is 'aaa'
- assertEquals("aaa", termEnum.term().text());
+ assertEquals("aaa", termEnum.term().utf8ToString());
assertEquals(200, termEnum.docFreq());
// go to term 'bbb'
termEnum.next();
// assert that term is 'bbb'
- assertEquals("bbb", termEnum.term().text());
+ assertEquals("bbb", termEnum.term().utf8ToString());
assertEquals(100, termEnum.docFreq());
-
- termEnum.close();
}
private void addDoc(IndexWriter writer, String value) throws IOException
Index: lucene/src/test/org/apache/lucene/index/TestIndexReader.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexReader.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestIndexReader.java (working copy)
@@ -55,6 +55,8 @@
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
public class TestIndexReader extends LuceneTestCase
{
@@ -287,22 +289,17 @@
int expected)
throws IOException
{
- TermDocs tdocs = null;
-
- try {
- tdocs = reader.termDocs(term);
- assertNotNull(msg + ", null TermDocs", tdocs);
- int count = 0;
- while(tdocs.next()) {
- count++;
- }
- assertEquals(msg + ", count mismatch", expected, count);
-
- } finally {
- if (tdocs != null)
- tdocs.close();
+ DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ term.field(),
+ new BytesRef(term.text()));
+ int count = 0;
+ if (tdocs != null) {
+ while(tdocs.nextDoc()!= tdocs.NO_MORE_DOCS) {
+ count++;
+ }
}
-
+ assertEquals(msg + ", count mismatch", expected, count);
}
public void testBasicDelete() throws IOException {
@@ -1348,21 +1345,26 @@
}
// check dictionary and posting lists
- TermEnum enum1 = index1.terms();
- TermEnum enum2 = index2.terms();
- TermPositions tp1 = index1.termPositions();
- TermPositions tp2 = index2.termPositions();
- while(enum1.next()) {
- assertTrue(enum2.next());
- assertEquals("Different term in dictionary.", enum1.term(), enum2.term());
- tp1.seek(enum1.term());
- tp2.seek(enum1.term());
- while(tp1.next()) {
- assertTrue(tp2.next());
- assertEquals("Different doc id in postinglist of term " + enum1.term() + ".", tp1.doc(), tp2.doc());
- assertEquals("Different term frequence in postinglist of term " + enum1.term() + ".", tp1.freq(), tp2.freq());
- for (int i = 0; i < tp1.freq(); i++) {
- assertEquals("Different positions in postinglist of term " + enum1.term() + ".", tp1.nextPosition(), tp2.nextPosition());
+ FieldsEnum fenum1 = MultiFields.getFields(index1).iterator();
+ FieldsEnum fenum2 = MultiFields.getFields(index1).iterator();
+ String field1 = null;
+ Bits delDocs = MultiFields.getDeletedDocs(index1);
+ while((field1=fenum1.next()) != null) {
+ assertEquals("Different fields", field1, fenum2.next());
+ TermsEnum enum1 = fenum1.terms();
+ TermsEnum enum2 = fenum2.terms();
+ while(enum1.next() != null) {
+ assertEquals("Different terms", enum1.term(), enum2.next());
+ DocsAndPositionsEnum tp1 = enum1.docsAndPositions(delDocs, null);
+ DocsAndPositionsEnum tp2 = enum2.docsAndPositions(delDocs, null);
+
+ while(tp1.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(tp2.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals("Different doc id in postinglist of term " + enum1.term() + ".", tp1.docID(), tp2.docID());
+ assertEquals("Different term frequence in postinglist of term " + enum1.term() + ".", tp1.freq(), tp2.freq());
+ for (int i = 0; i < tp1.freq(); i++) {
+ assertEquals("Different positions in postinglist of term " + enum1.term() + ".", tp1.nextPosition(), tp2.nextPosition());
+ }
}
}
}
Index: lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (working copy)
@@ -71,8 +71,6 @@
IndexReader r = dw.writer.getReader();
dw.writer.commit();
verifyEquals(r, dir, "id");
- FlexTestUtil.verifyFlexVsPreFlex(this.r, r);
- FlexTestUtil.verifyFlexVsPreFlex(this.r, dir);
r.close();
dw.writer.close();
dir.close();
@@ -94,8 +92,6 @@
// verifyEquals(dir2, dir2, "id");
verifyEquals(dir1, dir2, "id");
- FlexTestUtil.verifyFlexVsPreFlex(r, dir1);
- FlexTestUtil.verifyFlexVsPreFlex(r, dir2);
}
public void testMultiConfig() throws Throwable {
@@ -121,9 +117,6 @@
indexSerial(docs, dir2);
//System.out.println("TEST: verify");
verifyEquals(dir1, dir2, "id");
-
- FlexTestUtil.verifyFlexVsPreFlex(r, dir1);
- FlexTestUtil.verifyFlexVsPreFlex(r, dir2);
}
}
Index: lucene/src/test/org/apache/lucene/index/TestFlex.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestFlex.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestFlex.java (working copy)
@@ -52,13 +52,9 @@
}
IndexReader r = w.getReader();
- TermEnum terms = r.terms(new Term("field3", "bbb"));
- // pre-flex API should seek to the next field
- assertNotNull(terms.term());
- assertEquals("field4", terms.term().field());
- terms = r.terms(new Term("field5", "abc"));
- assertNull(terms.term());
+ TermsEnum terms = MultiFields.getTerms(r, "field3").iterator();
+ assertEquals(TermsEnum.SeekStatus.END, terms.seek(new BytesRef("abc")));
r.close();
}
Index: lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java (working copy)
@@ -24,6 +24,7 @@
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.util.BytesRef;
import java.io.IOException;
@@ -56,11 +57,12 @@
SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());
- TermDocs termDocs = reader.termDocs();
- assertTrue(termDocs != null);
- termDocs.seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "field"));
- if (termDocs.next() == true) {
- int docId = termDocs.doc();
+
+ TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator();
+ terms.seek(new BytesRef("field"));
+ DocsEnum termDocs = terms.docs(reader.getDeletedDocs(), null);
+ if (termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ int docId = termDocs.docID();
assertTrue(docId == 0);
int freq = termDocs.freq();
assertTrue(freq == 3);
@@ -77,20 +79,21 @@
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
- TermDocs termDocs = reader.termDocs();
- assertTrue(termDocs != null);
- termDocs.seek(new Term("textField2", "bad"));
- assertTrue(termDocs.next() == false);
+ DocsEnum termDocs = reader.termDocsEnum(reader.getDeletedDocs(),
+ "textField2",
+ new BytesRef("bad"));
+
+ assertNull(termDocs);
reader.close();
}
{
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
- TermDocs termDocs = reader.termDocs();
- assertTrue(termDocs != null);
- termDocs.seek(new Term("junk", "bad"));
- assertTrue(termDocs.next() == false);
+ DocsEnum termDocs = reader.termDocsEnum(reader.getDeletedDocs(),
+ "junk",
+ new BytesRef("bad"));
+ assertNull(termDocs);
reader.close();
}
}
@@ -121,105 +124,125 @@
IndexReader reader = IndexReader.open(dir, null, true, indexDivisor);
- TermDocs tdocs = reader.termDocs();
+ DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ ta.field(),
+ new BytesRef(ta.text()));
// without optimization (assumption skipInterval == 16)
// with next
- tdocs.seek(ta);
- assertTrue(tdocs.next());
- assertEquals(0, tdocs.doc());
+ assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, tdocs.docID());
assertEquals(4, tdocs.freq());
- assertTrue(tdocs.next());
- assertEquals(1, tdocs.doc());
+ assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(1, tdocs.docID());
assertEquals(4, tdocs.freq());
- assertTrue(tdocs.skipTo(0));
- assertEquals(2, tdocs.doc());
- assertTrue(tdocs.skipTo(4));
- assertEquals(4, tdocs.doc());
- assertTrue(tdocs.skipTo(9));
- assertEquals(9, tdocs.doc());
- assertFalse(tdocs.skipTo(10));
+ assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(2, tdocs.docID());
+ assertTrue(tdocs.advance(4) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(4, tdocs.docID());
+ assertTrue(tdocs.advance(9) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(9, tdocs.docID());
+ assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS);
// without next
- tdocs.seek(ta);
- assertTrue(tdocs.skipTo(0));
- assertEquals(0, tdocs.doc());
- assertTrue(tdocs.skipTo(4));
- assertEquals(4, tdocs.doc());
- assertTrue(tdocs.skipTo(9));
- assertEquals(9, tdocs.doc());
- assertFalse(tdocs.skipTo(10));
+ tdocs = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ ta.field(),
+ new BytesRef(ta.text()));
+ assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(0, tdocs.docID());
+ assertTrue(tdocs.advance(4) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(4, tdocs.docID());
+ assertTrue(tdocs.advance(9) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(9, tdocs.docID());
+ assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS);
+
// exactly skipInterval documents and therefore with optimization
// with next
- tdocs.seek(tb);
- assertTrue(tdocs.next());
- assertEquals(10, tdocs.doc());
+ tdocs = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ tb.field(),
+ new BytesRef(tb.text()));
+
+ assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(10, tdocs.docID());
assertEquals(4, tdocs.freq());
- assertTrue(tdocs.next());
- assertEquals(11, tdocs.doc());
+ assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(11, tdocs.docID());
assertEquals(4, tdocs.freq());
- assertTrue(tdocs.skipTo(5));
- assertEquals(12, tdocs.doc());
- assertTrue(tdocs.skipTo(15));
- assertEquals(15, tdocs.doc());
- assertTrue(tdocs.skipTo(24));
- assertEquals(24, tdocs.doc());
- assertTrue(tdocs.skipTo(25));
- assertEquals(25, tdocs.doc());
- assertFalse(tdocs.skipTo(26));
+ assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(12, tdocs.docID());
+ assertTrue(tdocs.advance(15) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(15, tdocs.docID());
+ assertTrue(tdocs.advance(24) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(24, tdocs.docID());
+ assertTrue(tdocs.advance(25) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(25, tdocs.docID());
+ assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS);
// without next
- tdocs.seek(tb);
- assertTrue(tdocs.skipTo(5));
- assertEquals(10, tdocs.doc());
- assertTrue(tdocs.skipTo(15));
- assertEquals(15, tdocs.doc());
- assertTrue(tdocs.skipTo(24));
- assertEquals(24, tdocs.doc());
- assertTrue(tdocs.skipTo(25));
- assertEquals(25, tdocs.doc());
- assertFalse(tdocs.skipTo(26));
+ tdocs = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ tb.field(),
+ new BytesRef(tb.text()));
+ assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(10, tdocs.docID());
+ assertTrue(tdocs.advance(15) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(15, tdocs.docID());
+ assertTrue(tdocs.advance(24) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(24, tdocs.docID());
+ assertTrue(tdocs.advance(25) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(25, tdocs.docID());
+ assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS);
+
// much more than skipInterval documents and therefore with optimization
// with next
- tdocs.seek(tc);
- assertTrue(tdocs.next());
- assertEquals(26, tdocs.doc());
+ tdocs = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ tc.field(),
+ new BytesRef(tc.text()));
+
+ assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(26, tdocs.docID());
assertEquals(4, tdocs.freq());
- assertTrue(tdocs.next());
- assertEquals(27, tdocs.doc());
+ assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertEquals(27, tdocs.docID());
assertEquals(4, tdocs.freq());
- assertTrue(tdocs.skipTo(5));
- assertEquals(28, tdocs.doc());
- assertTrue(tdocs.skipTo(40));
- assertEquals(40, tdocs.doc());
- assertTrue(tdocs.skipTo(57));
- assertEquals(57, tdocs.doc());
- assertTrue(tdocs.skipTo(74));
- assertEquals(74, tdocs.doc());
- assertTrue(tdocs.skipTo(75));
- assertEquals(75, tdocs.doc());
- assertFalse(tdocs.skipTo(76));
+ assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(28, tdocs.docID());
+ assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(40, tdocs.docID());
+ assertTrue(tdocs.advance(57) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(57, tdocs.docID());
+ assertTrue(tdocs.advance(74) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(74, tdocs.docID());
+ assertTrue(tdocs.advance(75) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(75, tdocs.docID());
+ assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS);
//without next
- tdocs.seek(tc);
- assertTrue(tdocs.skipTo(5));
- assertEquals(26, tdocs.doc());
- assertTrue(tdocs.skipTo(40));
- assertEquals(40, tdocs.doc());
- assertTrue(tdocs.skipTo(57));
- assertEquals(57, tdocs.doc());
- assertTrue(tdocs.skipTo(74));
- assertEquals(74, tdocs.doc());
- assertTrue(tdocs.skipTo(75));
- assertEquals(75, tdocs.doc());
- assertFalse(tdocs.skipTo(76));
+ tdocs = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ tc.field(),
+ new BytesRef(tc.text()));
+ assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(26, tdocs.docID());
+ assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(40, tdocs.docID());
+ assertTrue(tdocs.advance(57) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(57, tdocs.docID());
+ assertTrue(tdocs.advance(74) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(74, tdocs.docID());
+ assertTrue(tdocs.advance(75) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(75, tdocs.docID());
+ assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS);
- tdocs.close();
reader.close();
dir.close();
}
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy)
@@ -1379,8 +1379,11 @@
assertEquals(1, reader.numDocs());
Term t = new Term("field", "a");
assertEquals(1, reader.docFreq(t));
- TermDocs td = reader.termDocs(t);
- td.next();
+ DocsEnum td = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "field",
+ new BytesRef("a"));
+ td.nextDoc();
assertEquals(128*1024, td.freq());
reader.close();
dir.close();
@@ -1701,9 +1704,13 @@
// Make sure the doc that hit the exception was marked
// as deleted:
- TermDocs tdocs = reader.termDocs(t);
+ DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ t.field(),
+ new BytesRef(t.text()));
+
int count = 0;
- while(tdocs.next()) {
+ while(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
count++;
}
assertEquals(2, count);
@@ -2244,9 +2251,12 @@
// Quick test to make sure index is not corrupt:
IndexReader reader = IndexReader.open(dir, true);
- TermDocs tdocs = reader.termDocs(new Term("field", "aaa"));
+ DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ "field",
+ new BytesRef("aaa"));
int count = 0;
- while(tdocs.next()) {
+ while(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
count++;
}
assertTrue(count > 0);
@@ -3454,8 +3464,13 @@
Query q = new SpanTermQuery(new Term("field", "a"));
hits = s.search(q, null, 1000).scoreDocs;
assertEquals(1, hits.length);
- TermPositions tps = s.getIndexReader().termPositions(new Term("field", "a"));
- assertTrue(tps.next());
+
+ DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(s.getIndexReader(),
+ MultiFields.getDeletedDocs(s.getIndexReader()),
+ "field",
+ new BytesRef("a"));
+
+ assertTrue(tps.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(1, tps.freq());
assertEquals(0, tps.nextPosition());
w.close();
@@ -4465,12 +4480,12 @@
// test that the terms were indexed.
- assertTrue(ir.termDocs(new Term("binary","doc1field1")).next());
- assertTrue(ir.termDocs(new Term("binary","doc2field1")).next());
- assertTrue(ir.termDocs(new Term("binary","doc3field1")).next());
- assertTrue(ir.termDocs(new Term("string","doc1field2")).next());
- assertTrue(ir.termDocs(new Term("string","doc2field2")).next());
- assertTrue(ir.termDocs(new Term("string","doc3field2")).next());
+ assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc1field1")).nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc2field1")).nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc3field1")).nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc1field2")).nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc2field2")).nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc3field2")).nextDoc() != DocsEnum.NO_MORE_DOCS);
ir.close();
dir.close();
Index: lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java (working copy)
@@ -21,7 +21,6 @@
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -34,6 +33,7 @@
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
/**
* This testcase tests whether multi-level skipping is being used
@@ -68,11 +68,12 @@
writer.close();
IndexReader reader = SegmentReader.getOnlySegmentReader(dir);
- TermPositions tp = reader.termPositions();
for (int i = 0; i < 2; i++) {
counter = 0;
- tp.seek(term);
+ DocsAndPositionsEnum tp = reader.termPositionsEnum(reader.getDeletedDocs(),
+ term.field(),
+ new BytesRef(term.text()));
checkSkipTo(tp, 14, 185); // no skips
checkSkipTo(tp, 17, 190); // one skip on level 0
@@ -84,18 +85,18 @@
}
}
- public void checkSkipTo(TermPositions tp, int target, int maxCounter) throws IOException {
- tp.skipTo(target);
+ public void checkSkipTo(DocsAndPositionsEnum tp, int target, int maxCounter) throws IOException {
+ tp.advance(target);
if (maxCounter < counter) {
fail("Too many bytes read: " + counter);
}
- assertEquals("Wrong document " + tp.doc() + " after skipTo target " + target, target, tp.doc());
+ assertEquals("Wrong document " + tp.docID() + " after skipTo target " + target, target, tp.docID());
assertEquals("Frequency is not 1: " + tp.freq(), 1,tp.freq());
tp.nextPosition();
- byte[] b = new byte[1];
- tp.getPayload(b, 0);
- assertEquals("Wrong payload for the target " + target + ": " + b[0], (byte) target, b[0]);
+ BytesRef b = tp.getPayload();
+ assertEquals(1, b.length);
+ assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]);
}
private static class PayloadAnalyzer extends Analyzer {
Index: lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy)
@@ -38,6 +38,7 @@
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.BytesRef;
public class TestDocumentWriter extends LuceneTestCase {
private RAMDirectory dir;
@@ -128,8 +129,9 @@
writer.close();
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
- TermPositions termPositions = reader.termPositions(new Term("repeated", "repeated"));
- assertTrue(termPositions.next());
+ DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, MultiFields.getDeletedDocs(reader),
+ "repeated", new BytesRef("repeated"));
+ assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
int freq = termPositions.freq();
assertEquals(2, freq);
assertEquals(0, termPositions.nextPosition());
@@ -190,16 +192,16 @@
writer.close();
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
- TermPositions termPositions = reader.termPositions(new Term("f1", "a"));
- assertTrue(termPositions.next());
+ DocsAndPositionsEnum termPositions = reader.fields().terms("f1").docsAndPositions(reader.getDeletedDocs(), new BytesRef("a"), null);
+ assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
int freq = termPositions.freq();
assertEquals(3, freq);
assertEquals(0, termPositions.nextPosition());
- assertEquals(true, termPositions.isPayloadAvailable());
+ assertEquals(true, termPositions.hasPayload());
assertEquals(6, termPositions.nextPosition());
- assertEquals(false, termPositions.isPayloadAvailable());
+ assertEquals(false, termPositions.hasPayload());
assertEquals(7, termPositions.nextPosition());
- assertEquals(false, termPositions.isPayloadAvailable());
+ assertEquals(false, termPositions.hasPayload());
}
@@ -233,19 +235,19 @@
writer.close();
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
- TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1"));
- assertTrue(termPositions.next());
+ DocsAndPositionsEnum termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getDeletedDocs(), new BytesRef("term1"), null);
+ assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(1, termPositions.freq());
assertEquals(0, termPositions.nextPosition());
- termPositions.seek(new Term("preanalyzed", "term2"));
- assertTrue(termPositions.next());
+ termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getDeletedDocs(), new BytesRef("term2"), null);
+ assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(2, termPositions.freq());
assertEquals(1, termPositions.nextPosition());
assertEquals(3, termPositions.nextPosition());
- termPositions.seek(new Term("preanalyzed", "term3"));
- assertTrue(termPositions.next());
+ termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getDeletedDocs(), new BytesRef("term3"), null);
+ assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(1, termPositions.freq());
assertEquals(2, termPositions.nextPosition());
Index: lucene/src/test/org/apache/lucene/index/TestOmitTf.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestOmitTf.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestOmitTf.java (working copy)
@@ -87,10 +87,7 @@
f2.setOmitTermFreqAndPositions(false);
d.add(f2);
- Random rnd = newRandom();
-
writer.addDocument(d);
- FlexTestUtil.verifyFlexVsPreFlex(rnd, writer);
// force merge
writer.optimize();
@@ -98,8 +95,6 @@
writer.close();
_TestUtil.checkIndex(ram);
- FlexTestUtil.verifyFlexVsPreFlex(rnd, ram);
-
SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
FieldInfos fi = reader.fieldInfos();
assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions);
@@ -145,12 +140,8 @@
for(int i=0;i<30;i++)
writer.addDocument(d);
- Random rnd = newRandom();
- FlexTestUtil.verifyFlexVsPreFlex(rnd, writer);
-
// force merge
writer.optimize();
- FlexTestUtil.verifyFlexVsPreFlex(rnd, writer);
// flush
writer.close();
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java (working copy)
@@ -38,6 +38,7 @@
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ThreadInterruptedException;
public class TestIndexWriterReader extends LuceneTestCase {
@@ -63,12 +64,16 @@
public static int count(Term t, IndexReader r) throws IOException {
int count = 0;
- TermDocs td = r.termDocs(t);
- while (td.next()) {
- td.doc();
- count++;
+ DocsEnum td = MultiFields.getTermDocsEnum(r,
+ MultiFields.getDeletedDocs(r),
+ t.field(), new BytesRef(t.text()));
+
+ if (td != null) {
+ while (td.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ td.docID();
+ count++;
+ }
}
- td.close();
return count;
}
Index: lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java (working copy)
@@ -25,6 +25,7 @@
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.BytesRef;
import java.io.IOException;
@@ -158,35 +159,29 @@
MultiReader mr3 = new MultiReader(readers2);
// test mixing up TermDocs and TermEnums from different readers.
- TermDocs td2 = mr2.termDocs();
- TermEnum te3 = mr3.terms(new Term("body","wow"));
- td2.seek(te3);
+ TermsEnum te2 = MultiFields.getTerms(mr2, "body").iterator();
+ te2.seek(new BytesRef("wow"));
+ DocsEnum td = MultiFields.getTermDocsEnum(mr2,
+ MultiFields.getDeletedDocs(mr2),
+ "body",
+ te2.term());
+
+ TermsEnum te3 = MultiFields.getTerms(mr3, "body").iterator();
+ te3.seek(new BytesRef("wow"));
+ td = te3.docs(MultiFields.getDeletedDocs(mr3),
+ td);
+
int ret = 0;
// This should blow up if we forget to check that the TermEnum is from the same
// reader as the TermDocs.
- while (td2.next()) ret += td2.doc();
- td2.close();
- te3.close();
+ while (td.nextDoc() != td.NO_MORE_DOCS) ret += td.docID();
// really a dummy assert to ensure that we got some docs and to ensure that
// nothing is optimized out.
assertTrue(ret > 0);
}
- public void testAllTermDocs() throws IOException {
- IndexReader reader = openReader();
- int NUM_DOCS = 2;
- TermDocs td = reader.termDocs(null);
- for(int i=0;i stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
assertTrue(stored != null);
Index: lucene/src/test/org/apache/lucene/index/TestStressIndexing.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestStressIndexing.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestStressIndexing.java (working copy)
@@ -153,8 +153,6 @@
modifier.close();
- FlexTestUtil.verifyFlexVsPreFlex(RANDOM, directory);
-
for(int i=0;i= 0);
}
Index: lucene/src/test/org/apache/lucene/index/TestTermEnumSurrogate.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestTermEnumSurrogate.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestTermEnumSurrogate.java (working copy)
@@ -1,53 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.LuceneTestCase;
-
-/**
- * Back-compat test that you can seek to a lead surrogate in the term
- * dictionary. With the old lucene API, this worked, due to the fact that the
- * Term itself did not need to be converted into proper UTF-8 bytes.
- *
- * With the new API the provided Term text must be encodeable into UTF-8.
- *
- * @deprecated Remove this when the old API is no longer supported.
- */
-@Deprecated
-public class TestTermEnumSurrogate extends LuceneTestCase {
- public void testSeekSurrogate() throws Exception {
- RAMDirectory dir = new RAMDirectory();
- IndexWriter writer = new IndexWriter(dir, new MockAnalyzer(),
- IndexWriter.MaxFieldLength.UNLIMITED);
- Document d = new Document();
- Field f = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);
- d.add(f);
- f.setValue("abacadaba");
- writer.addDocument(d);
- f.setValue("𩬅");
- writer.addDocument(d);
- writer.close();
- IndexReader reader = IndexReader.open(dir, true);
- TermEnum te = reader.terms(new Term("field", "𩬅".substring(0, 1)));
- assertEquals(new Term("field", "𩬅"), te.term());
- }
-}
Index: lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 954967)
+++ lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy)
@@ -129,20 +129,15 @@
};
public void testOptimizeOldIndex() throws Exception {
- Random rand = newRandom();
-
for(int i=0;i allTerms = new ArrayList();
- //System.out.println("TEST: now verify!!");
- testStraightEnum(r);
- testRandomSkips(rand, r);
- testRandomSeeks(rand, r);
- testBogusFieldTerms(rand, r);
- }
-
- private static void testBogusFieldTerms(Random rand, IndexReader r) throws Exception {
- final Fields fields = MultiFields.getFields(r);
- if (fields == null) {
- return;
- }
- for(int i=0;i<10;i++) {
- final String f = "bogus" + rand.nextInt() + "reallybogus";
- Terms terms = fields.terms(f);
- assertTrue(terms == null || terms.iterator().next() == null);
- }
- }
-
- private static void testStraightEnum(IndexReader r) throws Exception {
-
- // straight enum of fields/terms/docs/positions
- TermEnum termEnum = r.terms();
- final Fields fields = MultiFields.getFields(r);
- if (fields == null) {
- return;
- }
- FieldsEnum fieldsEnum = fields.iterator();
- while(true) {
- final String field = fieldsEnum.next();
- if (field == null) {
- boolean result = termEnum.next();
- if (result) {
- System.out.println("got unexpected term=" + termEnum.term() + " termEnum=" + termEnum);
- }
- assertFalse(result);
- break;
- }
- TermsEnum terms = fieldsEnum.terms();
- DocsAndPositionsEnum postings = null;
- DocsEnum docsEnum = null;
- final TermPositions termPos = r.termPositions();
- while(true) {
- final BytesRef termRef = terms.next();
- if (termRef == null) {
- break;
- } else {
- assertTrue(termEnum.next());
- Term t = termEnum.term();
- assertEquals(t.field(), field);
- assertEquals(t.text(), termRef.utf8ToString());
- assertEquals(termEnum.docFreq(), terms.docFreq());
- //allTerms.add(t);
-
- postings = terms.docsAndPositions(MultiFields.getDeletedDocs(r), postings);
- docsEnum = terms.docs(MultiFields.getDeletedDocs(r), docsEnum);
-
- final DocsEnum docs;
- if (postings != null) {
- docs = postings;
- } else {
- docs = docsEnum;
- }
-
- termPos.seek(t);
- while(true) {
- final int doc = docs.nextDoc();
- if (doc == DocsEnum.NO_MORE_DOCS) {
- assertFalse(termPos.next());
- break;
- } else {
- assertTrue(termPos.next());
- assertEquals(termPos.doc(), doc);
- assertEquals(termPos.freq(), docs.freq());
- final int freq = docs.freq();
- if (postings == null) {
- assertEquals(1, freq);
- // Old API did not always do this,
- // specifically in the MultiTermPositions
- // case when some segs omit positions and
- // some don't
- //assertEquals(0, termPos.nextPosition());
- assertEquals(false, termPos.isPayloadAvailable());
- } else {
- for(int i=0;i limit) {
- // copy down
- // TODO: in general I should not muck w/
- // the int[]'s returned to me like
- // this... this could mess up codecs
- // that have persistent RAM storage of
- // these int[]'s
- System.arraycopy(result1.docs.ints, limit, result1.docs.ints, 0, result1Count-limit);
- System.arraycopy(result1.freqs.ints, limit, result1.freqs.ints, 0, result1Count-limit);
- }
- result1Count -= limit;
-
- if (count2 > limit) {
- // copy down
- System.arraycopy(docs2, limit, docs2, 0, count2-limit);
- System.arraycopy(freqs2, limit, freqs2, 0, count2-limit);
- }
- count2 -= limit;
- }
- } else {
- // Enum the docs one by one
- //System.out.println("TEST: get docs");
- termPos.seek(t);
- while(true) {
- final int doc = docsEnum.nextDoc();
- if (doc == DocsEnum.NO_MORE_DOCS) {
- assertFalse(termPos.next());
- break;
- } else {
- assertTrue(termPos.next());
- assertEquals(termPos.doc(), doc);
- assertEquals(termPos.freq(), docsEnum.freq());
- //System.out.println("TEST: doc=" + doc + " freq=" + docs.freq());
- if (rand.nextInt(3) <= 1) {
- // enum the positions
- final int freq = docsEnum.freq();
- if (postings == null) {
- assertEquals(1, termPos.freq());
- // Old API did not always do this,
- // specifically in the MultiTermPositions
- // case when some segs omit positions and
- // some don't
- //assertEquals(0, termPos.nextPosition());
- assertFalse(termPos.isPayloadAvailable());
- } else {
- // we have positions
- for(int i=0;i
A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
- length byte array. Use {@link TermPositions#getPayloadLength()} and
- {@link TermPositions#getPayload(byte[], int)} to retrieve the payloads from the index.
+ length byte array. Use {@link DocsAndPositionsEnum#getPayloadLength()} and
+ {@link DocsAndPositionsEnum#getPayload(byte[], int)} to retrieve the payloads from the index.
Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy)
@@ -32,7 +32,6 @@
import org.apache.lucene.queryParser.QueryParser; // for javadoc
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.VirtualMethod;
/**
* An abstract {@link Query} that matches documents
@@ -72,17 +71,6 @@
protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
transient int numberOfTerms = 0;
- /** @deprecated remove when getEnum is removed */
- private static final VirtualMethod getEnumMethod =
- new VirtualMethod(MultiTermQuery.class, "getEnum", IndexReader.class);
- /** @deprecated remove when getEnum is removed */
- private static final VirtualMethod getTermsEnumMethod =
- new VirtualMethod(MultiTermQuery.class, "getTermsEnum", IndexReader.class);
- /** @deprecated remove when getEnum is removed */
- final boolean hasNewAPI =
- VirtualMethod.compareImplementationDistance(getClass(),
- getTermsEnumMethod, getEnumMethod) >= 0; // its ok for both to be overridden
-
/** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link #getTermsEnum}
* and update the boost on each returned term. This enables to control the boost factor
* for each matching term in {@link #SCORING_BOOLEAN_QUERY_REWRITE} or
@@ -190,64 +178,42 @@
protected final int collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
- if (query.hasNewAPI) {
+ if (query.field == null) {
+ throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery.");
+ }
- if (query.field == null) {
- throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery.");
- }
+ final Fields fields = MultiFields.getFields(reader);
+ if (fields == null) {
+ // reader has no fields
+ return 0;
+ }
- final Fields fields = MultiFields.getFields(reader);
- if (fields == null) {
- // reader has no fields
- return 0;
- }
+ final Terms terms = fields.terms(query.field);
+ if (terms == null) {
+ // field does not exist
+ return 0;
+ }
- final Terms terms = fields.terms(query.field);
- if (terms == null) {
- // field does not exist
- return 0;
- }
+ final TermsEnum termsEnum = query.getTermsEnum(reader);
+ assert termsEnum != null;
- final TermsEnum termsEnum = query.getTermsEnum(reader);
- assert termsEnum != null;
-
- if (termsEnum == TermsEnum.EMPTY)
- return 0;
- final BoostAttribute boostAtt =
- termsEnum.attributes().addAttribute(BoostAttribute.class);
- collector.boostAtt = boostAtt;
- int count = 0;
- BytesRef term;
- final Term placeholderTerm = new Term(query.field);
- while ((term = termsEnum.next()) != null) {
- if (collector.collect(placeholderTerm.createTerm(term.utf8ToString()), boostAtt.getBoost())) {
- count++;
- } else {
- break;
- }
+ if (termsEnum == TermsEnum.EMPTY)
+ return 0;
+ final BoostAttribute boostAtt =
+ termsEnum.attributes().addAttribute(BoostAttribute.class);
+ collector.boostAtt = boostAtt;
+ int count = 0;
+ BytesRef term;
+ final Term placeholderTerm = new Term(query.field);
+ while ((term = termsEnum.next()) != null) {
+ if (collector.collect(placeholderTerm.createTerm(term.utf8ToString()), boostAtt.getBoost())) {
+ count++;
+ } else {
+ break;
}
- collector.boostAtt = null;
- return count;
- } else {
- // deprecated case
- final FilteredTermEnum enumerator = query.getEnum(reader);
- int count = 0;
- try {
- do {
- Term t = enumerator.term();
- if (t != null) {
- if (collector.collect(t, enumerator.difference())) {
- count++;
- } else {
- break;
- }
- }
- } while (enumerator.next());
- } finally {
- enumerator.close();
- }
- return count;
}
+ collector.boostAtt = null;
+ return count;
}
protected static abstract class TermCollector {
@@ -699,24 +665,13 @@
public final String getField() { return field; }
/** Construct the enumeration to be used, expanding the
- * pattern term.
- * @deprecated Please override {@link #getTermsEnum} instead */
- @Deprecated
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- /** Construct the enumeration to be used, expanding the
* pattern term. This method should only be called if
* the field exists (ie, implementations can assume the
* field does exist). This method should not return null
* (should instead return {@link TermsEnum#EMPTY} if no
* terms match). The TermsEnum must already be
* positioned to the first matching term. */
- // TODO 4.0: make this method abstract
- protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- throw new UnsupportedOperationException();
- }
+ protected abstract TermsEnum getTermsEnum(IndexReader reader) throws IOException;
/**
* Expert: Return the number of unique terms visited during execution of the query.
Index: lucene/src/java/org/apache/lucene/search/PrefixTermEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/PrefixTermEnum.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/PrefixTermEnum.java (working copy)
@@ -1,68 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-
-/**
- * Subclass of FilteredTermEnum for enumerating all terms that match the
- * specified prefix filter term.
- *
- * Term enumerations are always ordered by Term.compareTo(). Each term in
- * the enumeration is greater than all that precede it.
- *
- * @deprecated Use {@link PrefixTermsEnum} instead.
- */
-@Deprecated
-public class PrefixTermEnum extends FilteredTermEnum {
-
- private final Term prefix;
- private boolean endEnum = false;
-
- public PrefixTermEnum(IndexReader reader, Term prefix) throws IOException {
- this.prefix = prefix;
-
- setEnum(reader.terms(new Term(prefix.field(), prefix.text())));
- }
-
- @Override
- public float difference() {
- return 1.0f;
- }
-
- @Override
- protected boolean endEnum() {
- return endEnum;
- }
-
- protected Term getPrefixTerm() {
- return prefix;
- }
-
- @Override
- protected boolean termCompare(Term term) {
- if (term.field() == prefix.field() && term.text().startsWith(prefix.text())) {
- return true;
- }
- endEnum = true;
- return false;
- }
-}
Index: lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/FuzzyQuery.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/FuzzyQuery.java (working copy)
@@ -130,14 +130,6 @@
return prefixLength;
}
- @Override @Deprecated
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- if (!termLongEnough) { // can only match if it's exact
- return new SingleTermEnum(reader, term);
- }
- return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
- }
-
@Override
protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
if (!termLongEnough) { // can only match if it's exact
Index: lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy)
@@ -20,11 +20,8 @@
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.MultiFields;
@@ -109,97 +106,54 @@
*/
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
- if (query.hasNewAPI) {
- if (query.field == null) {
- throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery.");
- }
+ if (query.field == null) {
+ throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery.");
+ }
- final Fields fields = MultiFields.getFields(reader);
- if (fields == null) {
- // reader has no fields
- return DocIdSet.EMPTY_DOCIDSET;
- }
+ final Fields fields = MultiFields.getFields(reader);
+ if (fields == null) {
+ // reader has no fields
+ return DocIdSet.EMPTY_DOCIDSET;
+ }
- final Terms terms = fields.terms(query.field);
- if (terms == null) {
- // field does not exist
- return DocIdSet.EMPTY_DOCIDSET;
- }
+ final Terms terms = fields.terms(query.field);
+ if (terms == null) {
+ // field does not exist
+ return DocIdSet.EMPTY_DOCIDSET;
+ }
- final TermsEnum termsEnum = query.getTermsEnum(reader);
- assert termsEnum != null;
- if (termsEnum.next() != null) {
- // fill into a OpenBitSet
- final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
- int termCount = 0;
- final Bits delDocs = MultiFields.getDeletedDocs(reader);
- DocsEnum docsEnum = null;
- do {
- termCount++;
- // System.out.println(" iter termCount=" + termCount + " term=" +
- // enumerator.term().toBytesString());
- docsEnum = termsEnum.docs(delDocs, docsEnum);
- final DocsEnum.BulkReadResult result = docsEnum.getBulkResult();
- while (true) {
- final int count = docsEnum.read();
- if (count != 0) {
- final int[] docs = result.docs.ints;
- for (int i = 0; i < count; i++) {
- bitSet.set(docs[i]);
- }
- } else {
- break;
+ final TermsEnum termsEnum = query.getTermsEnum(reader);
+ assert termsEnum != null;
+ if (termsEnum.next() != null) {
+ // fill into a OpenBitSet
+ final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
+ int termCount = 0;
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docsEnum = null;
+ do {
+ termCount++;
+ // System.out.println(" iter termCount=" + termCount + " term=" +
+ // enumerator.term().toBytesString());
+ docsEnum = termsEnum.docs(delDocs, docsEnum);
+ final DocsEnum.BulkReadResult result = docsEnum.getBulkResult();
+ while (true) {
+ final int count = docsEnum.read();
+ if (count != 0) {
+ final int[] docs = result.docs.ints;
+ for (int i = 0; i < count; i++) {
+ bitSet.set(docs[i]);
}
+ } else {
+ break;
}
- } while (termsEnum.next() != null);
- // System.out.println(" done termCount=" + termCount);
+ }
+ } while (termsEnum.next() != null);
+ // System.out.println(" done termCount=" + termCount);
- query.incTotalNumberOfTerms(termCount);
- return bitSet;
- } else {
- return DocIdSet.EMPTY_DOCIDSET;
- }
+ query.incTotalNumberOfTerms(termCount);
+ return bitSet;
} else {
- final TermEnum enumerator = query.getEnum(reader);
- try {
- // if current term in enum is null, the enum is empty -> shortcut
- if (enumerator.term() == null)
- return DocIdSet.EMPTY_DOCIDSET;
- // else fill into a OpenBitSet
- final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
- final int[] docs = new int[32];
- final int[] freqs = new int[32];
- TermDocs termDocs = reader.termDocs();
- try {
- int termCount = 0;
- do {
- Term term = enumerator.term();
- if (term == null)
- break;
- termCount++;
- termDocs.seek(term);
- while (true) {
- final int count = termDocs.read(docs, freqs);
- if (count != 0) {
- for (int i = 0; i < count; i++) {
- bitSet.set(docs[i]);
- }
- } else {
- break;
- }
- }
- } while (enumerator.next());
-
- query.incTotalNumberOfTerms(termCount);
-
- } finally {
- termDocs.close();
- }
- return bitSet;
- } finally {
- enumerator.close();
- }
+ return DocIdSet.EMPTY_DOCIDSET;
}
}
-
}
Index: lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java (working copy)
@@ -1,288 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-
-/** Subclass of FilteredTermEnum for enumerating all terms that are similar
- * to the specified filter term.
- *
- *
Term enumerations are always ordered by Term.compareTo(). Each term in
- * the enumeration is greater than all that precede it.
- *
- * @deprecated Please use {@link FuzzyTermsEnum} instead.
- */
-@Deprecated
-public final class FuzzyTermEnum extends FilteredTermEnum {
-
- /* Allows us save time required to create a new array
- * every time similarity is called.
- */
- private int[] p;
- private int[] d;
-
- private float similarity;
- private boolean endEnum = false;
-
- private Term searchTerm = null;
- private final String field;
- private final String text;
- private final String prefix;
-
- private final float minimumSimilarity;
- private final float scale_factor;
-
- /**
- * Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f.
- *
- * After calling the constructor the enumeration is already pointing to the first
- * valid term if such a term exists.
- *
- * @param reader
- * @param term
- * @throws IOException
- * @see #FuzzyTermEnum(IndexReader, Term, float, int)
- */
- public FuzzyTermEnum(IndexReader reader, Term term) throws IOException {
- this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength);
- }
-
- /**
- * Creates a FuzzyTermEnum with an empty prefix.
- *
- * After calling the constructor the enumeration is already pointing to the first
- * valid term if such a term exists.
- *
- * @param reader
- * @param term
- * @param minSimilarity
- * @throws IOException
- * @see #FuzzyTermEnum(IndexReader, Term, float, int)
- */
- public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException {
- this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength);
- }
-
- /**
- * Constructor for enumeration of all terms from specified reader which share a prefix of
- * length prefixLength with term and which have a fuzzy similarity >
- * minSimilarity.
- *
- * After calling the constructor the enumeration is already pointing to the first
- * valid term if such a term exists.
- *
- * @param reader Delivers terms.
- * @param term Pattern term.
- * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f.
- * @param prefixLength Length of required common prefix. Default value is 0.
- * @throws IOException
- */
- public FuzzyTermEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException {
- super();
-
- if (minSimilarity >= 1.0f)
- throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1");
- else if (minSimilarity < 0.0f)
- throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
- if(prefixLength < 0)
- throw new IllegalArgumentException("prefixLength cannot be less than 0");
-
- this.minimumSimilarity = minSimilarity;
- this.scale_factor = 1.0f / (1.0f - minimumSimilarity);
- this.searchTerm = term;
- this.field = searchTerm.field();
-
- //The prefix could be longer than the word.
- //It's kind of silly though. It means we must match the entire word.
- final int fullSearchTermLength = searchTerm.text().length();
- final int realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength;
-
- this.text = searchTerm.text().substring(realPrefixLength);
- this.prefix = searchTerm.text().substring(0, realPrefixLength);
-
- this.p = new int[this.text.length()+1];
- this.d = new int[this.text.length()+1];
-
- setEnum(reader.terms(new Term(searchTerm.field(), prefix)));
- }
-
- /**
- * The termCompare method in FuzzyTermEnum uses Levenshtein distance to
- * calculate the distance between the given term and the comparing term.
- */
- @Override
- protected final boolean termCompare(Term term) {
- if (field == term.field() && term.text().startsWith(prefix)) {
- final String target = term.text().substring(prefix.length());
- this.similarity = similarity(target);
- return (similarity > minimumSimilarity);
- }
- endEnum = true;
- return false;
- }
-
- /** @deprecated Use {@link MultiTermQuery.BoostAttribute} together with {@link FuzzyTermsEnum} */
- @Deprecated
- @Override
- public final float difference() {
- return (similarity - minimumSimilarity) * scale_factor;
- }
-
- /** {@inheritDoc} */
- @Override
- public final boolean endEnum() {
- return endEnum;
- }
-
- /******************************
- * Compute Levenshtein distance
- ******************************/
-
- /**
- *
Similarity returns a number that is 1.0f or less (including negative numbers)
- * based on how similar the Term is compared to a target term. It returns
- * exactly 0.0f when
- *
- * editDistance > maximumEditDistance
- * Otherwise it returns:
- *
- * 1 - (editDistance / length)
- * where length is the length of the shortest term (text or target) including a
- * prefix that are identical and editDistance is the Levenshtein distance for
- * the two words.
- *
- * Embedded within this algorithm is a fail-fast Levenshtein distance
- * algorithm. The fail-fast algorithm differs from the standard Levenshtein
- * distance algorithm in that it is aborted if it is discovered that the
- * minimum distance between the words is greater than some threshold.
- *
- *
To calculate the maximum distance threshold we use the following formula:
- *
- * (1 - minimumSimilarity) * length
- * where length is the shortest term including any prefix that is not part of the
- * similarity comparison. This formula was derived by solving for what maximum value
- * of distance returns false for the following statements:
- *
- * similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
- * return (similarity > minimumSimilarity);
- * where distance is the Levenshtein distance for the two words.
- *
- * Levenshtein distance (also known as edit distance) is a measure of similarity
- * between two strings where the distance is measured as the number of character
- * deletions, insertions or substitutions required to transform one string to
- * the other string.
- * @param target the target word or phrase
- * @return the similarity, 0.0 or less indicates that it matches less than the required
- * threshold and 1.0 indicates that the text and target are identical
- */
- private float similarity(final String target) {
- final int m = target.length();
- final int n = text.length();
- if (n == 0) {
- //we don't have anything to compare. That means if we just add
- //the letters for m we get the new word
- return prefix.length() == 0 ? 0.0f : 1.0f - ((float) m / prefix.length());
- }
- if (m == 0) {
- return prefix.length() == 0 ? 0.0f : 1.0f - ((float) n / prefix.length());
- }
-
- final int maxDistance = calculateMaxDistance(m);
-
- if (maxDistance < Math.abs(m-n)) {
- //just adding the characters of m to n or vice-versa results in
- //too many edits
- //for example "pre" length is 3 and "prefixes" length is 8. We can see that
- //given this optimal circumstance, the edit distance cannot be less than 5.
- //which is 8-3 or more precisely Math.abs(3-8).
- //if our maximum edit distance is 4, then we can discard this word
- //without looking at it.
- return 0.0f;
- }
-
- // init matrix d
- for (int i = 0; i<=n; ++i) {
- p[i] = i;
- }
-
- // start computing edit distance
- for (int j = 1; j<=m; ++j) { // iterates through target
- int bestPossibleEditDistance = m;
- final char t_j = target.charAt(j-1); // jth character of t
- d[0] = j;
-
- for (int i=1; i<=n; ++i) { // iterates through text
- // minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1)
- if (t_j != text.charAt(i-1)) {
- d[i] = Math.min(Math.min(d[i-1], p[i]), p[i-1]) + 1;
- } else {
- d[i] = Math.min(Math.min(d[i-1]+1, p[i]+1), p[i-1]);
- }
- bestPossibleEditDistance = Math.min(bestPossibleEditDistance, d[i]);
- }
-
- //After calculating row i, the best possible edit distance
- //can be found by found by finding the smallest value in a given column.
- //If the bestPossibleEditDistance is greater than the max distance, abort.
-
- if (j > maxDistance && bestPossibleEditDistance > maxDistance) { //equal is okay, but not greater
- //the closest the target can be to the text is just too far away.
- //this target is leaving the party early.
- return 0.0f;
- }
-
- // copy current distance counts to 'previous row' distance counts: swap p and d
- int _d[] = p;
- p = d;
- d = _d;
- }
-
- // our last action in the above loop was to switch d and p, so p now
- // actually has the most recent cost counts
-
- // this will return less than 0.0 when the edit distance is
- // greater than the number of characters in the shorter word.
- // but this was the formula that was previously used in FuzzyTermEnum,
- // so it has not been changed (even though minimumSimilarity must be
- // greater than 0.0)
- return 1.0f - ((float)p[n] / (float) (prefix.length() + Math.min(n, m)));
- }
-
- /**
- * The max Distance is the maximum Levenshtein distance for the text
- * compared to some other value that results in score that is
- * better than the minimum similarity.
- * @param m the length of the "other value"
- * @return the maximum levenshtein distance that we care about
- */
- private int calculateMaxDistance(int m) {
- return (int) ((1-minimumSimilarity) * (Math.min(text.length(), m) + prefix.length()));
- }
-
- /** {@inheritDoc} */
- @Override
- public void close() throws IOException {
- p = d = null;
- searchTerm = null;
- super.close(); //call super.close() and let the garbage collector do its work.
- }
-
-}
Index: lucene/src/java/org/apache/lucene/search/PrefixQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/PrefixQuery.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/PrefixQuery.java (working copy)
@@ -44,11 +44,6 @@
/** Returns the prefix of this query. */
public Term getPrefix() { return prefix; }
- @Override @Deprecated
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new PrefixTermEnum(reader, prefix);
- }
-
@Override
protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
if (prefix.text().length() == 0) {
Index: lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (working copy)
@@ -22,7 +22,6 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.index.TermDocs; // for javadocs
/**
* A {@link Filter} that only accepts documents whose single
@@ -70,7 +69,7 @@
*
* In contrast, TermsFilter builds up an {@link OpenBitSet},
* keyed by docID, every time it's created, by enumerating
- * through all matching docs using {@link TermDocs} to seek
+ * through all matching docs using {@link DocsEnum} to seek
* and scan through each term's docID list. While there is
* no linear scan of all docIDs, besides the allocation of
* the underlying array in the {@link OpenBitSet}, this
Index: lucene/src/java/org/apache/lucene/search/TermRangeTermEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermRangeTermEnum.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/TermRangeTermEnum.java (working copy)
@@ -1,156 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.text.Collator;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.util.StringHelper;
-
-/**
- * Subclass of FilteredTermEnum for enumerating all terms that match the
- * specified range parameters.
- *
- * Term enumerations are always ordered by Term.compareTo(). Each term in
- * the enumeration is greater than all that precede it.
- * @since 2.9
- * @deprecated Please switch to {@link TermRangeTermsEnum}
- */
-@Deprecated
-public class TermRangeTermEnum extends FilteredTermEnum {
-
- private Collator collator = null;
- private boolean endEnum = false;
- private String field;
- private String upperTermText;
- private String lowerTermText;
- private boolean includeLower;
- private boolean includeUpper;
-
- /**
- * Enumerates all terms greater/equal than lowerTerm
- * but less/equal than upperTerm.
- *
- * If an endpoint is null, it is said to be "open". Either or both
- * endpoints may be open. Open endpoints may not be exclusive
- * (you can't select all but the first or last term without
- * explicitly specifying the term to exclude.)
- *
- * @param reader
- * @param field
- * An interned field that holds both lower and upper terms.
- * @param lowerTermText
- * The term text at the lower end of the range
- * @param upperTermText
- * The term text at the upper end of the range
- * @param includeLower
- * If true, the lowerTerm is included in the range.
- * @param includeUpper
- * If true, the upperTerm is included in the range.
- * @param collator
- * The collator to use to collate index Terms, to determine their
- * membership in the range bounded by lowerTerm and
- * upperTerm.
- *
- * @throws IOException
- */
- public TermRangeTermEnum(IndexReader reader, String field, String lowerTermText, String upperTermText,
- boolean includeLower, boolean includeUpper, Collator collator) throws IOException {
- this.collator = collator;
- this.upperTermText = upperTermText;
- this.lowerTermText = lowerTermText;
- this.includeLower = includeLower;
- this.includeUpper = includeUpper;
- this.field = StringHelper.intern(field);
-
- // do a little bit of normalization...
- // open ended range queries should always be inclusive.
- if (this.lowerTermText == null) {
- this.lowerTermText = "";
- this.includeLower = true;
- }
-
- if (this.upperTermText == null) {
- this.includeUpper = true;
- }
-
- String startTermText = collator == null ? this.lowerTermText : "";
- setEnum(reader.terms(new Term(this.field, startTermText)));
- }
-
- @Override
- public float difference() {
- return 1.0f;
- }
-
- @Override
- protected boolean endEnum() {
- return endEnum;
- }
-
- @Override
- protected boolean termCompare(Term term) {
- if (collator == null) {
- // Use Unicode code point ordering
- boolean checkLower = false;
- if (!includeLower) // make adjustments to set to exclusive
- checkLower = true;
- if (term != null && term.field() == field) { // interned comparison
- if (!checkLower || null==lowerTermText || term.text().compareTo(lowerTermText) > 0) {
- checkLower = false;
- if (upperTermText != null) {
- int compare = upperTermText.compareTo(term.text());
- /*
- * if beyond the upper term, or is exclusive and this is equal to
- * the upper term, break out
- */
- if ((compare < 0) ||
- (!includeUpper && compare==0)) {
- endEnum = true;
- return false;
- }
- }
- return true;
- }
- } else {
- // break
- endEnum = true;
- return false;
- }
- return false;
- } else {
- if (term != null && term.field() == field) { // interned comparison
- if ((lowerTermText == null
- || (includeLower
- ? collator.compare(term.text(), lowerTermText) >= 0
- : collator.compare(term.text(), lowerTermText) > 0))
- && (upperTermText == null
- || (includeUpper
- ? collator.compare(term.text(), upperTermText) <= 0
- : collator.compare(term.text(), upperTermText) < 0))) {
- return true;
- }
- return false;
- }
- endEnum = true;
- return false;
- }
- }
-}
Index: lucene/src/java/org/apache/lucene/search/WildcardTermEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/WildcardTermEnum.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/WildcardTermEnum.java (working copy)
@@ -1,192 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-
-/**
- * Subclass of FilteredTermEnum for enumerating all terms that match the
- * specified wildcard filter term.
- *
- * Term enumerations are always ordered by Term.compareTo(). Each term in
- * the enumeration is greater than all that precede it.
- * @deprecated Please use {@link AutomatonTermsEnum} instead.
- */
-@Deprecated
-public class WildcardTermEnum extends FilteredTermEnum {
- final Term searchTerm;
- final String field;
- final String text;
- final String pre;
- final int preLen;
- boolean endEnum = false;
-
- /**
- * Creates a new WildcardTermEnum.
- *
- * After calling the constructor the enumeration is already pointing to the first
- * valid term if such a term exists.
- */
- public WildcardTermEnum(IndexReader reader, Term term) throws IOException {
- super();
- searchTerm = term;
- field = searchTerm.field();
- final String searchTermText = searchTerm.text();
-
- final int sidx = searchTermText.indexOf(WILDCARD_STRING);
- final int cidx = searchTermText.indexOf(WILDCARD_CHAR);
- int idx = sidx;
- if (idx == -1) {
- idx = cidx;
- }
- else if (cidx >= 0) {
- idx = Math.min(idx, cidx);
- }
- pre = idx != -1?searchTerm.text().substring(0,idx): "";
-
- preLen = pre.length();
- text = searchTermText.substring(preLen);
- setEnum(reader.terms(new Term(searchTerm.field(), pre)));
- }
-
- @Override
- protected final boolean termCompare(Term term) {
- if (field == term.field()) {
- String searchText = term.text();
- if (searchText.startsWith(pre)) {
- return wildcardEquals(text, 0, searchText, preLen);
- }
- }
- endEnum = true;
- return false;
- }
-
- @Override
- public float difference() {
- return 1.0f;
- }
-
- @Override
- public final boolean endEnum() {
- return endEnum;
- }
-
- /********************************************
- * String equality with support for wildcards
- ********************************************/
-
- public static final char WILDCARD_STRING = WildcardQuery.WILDCARD_STRING;
- public static final char WILDCARD_CHAR = WildcardQuery.WILDCARD_CHAR;
-
- /**
- * Determines if a word matches a wildcard pattern.
- * Work released by Granta Design Ltd after originally being done on
- * company time.
- */
- public static final boolean wildcardEquals(String pattern, int patternIdx,
- String string, int stringIdx)
- {
- int p = patternIdx;
-
- for (int s = stringIdx; ; ++p, ++s)
- {
- // End of string yet?
- boolean sEnd = (s >= string.length());
- // End of pattern yet?
- boolean pEnd = (p >= pattern.length());
-
- // If we're looking at the end of the string...
- if (sEnd)
- {
- // Assume the only thing left on the pattern is/are wildcards
- boolean justWildcardsLeft = true;
-
- // Current wildcard position
- int wildcardSearchPos = p;
- // While we haven't found the end of the pattern,
- // and haven't encountered any non-wildcard characters
- while (wildcardSearchPos < pattern.length() && justWildcardsLeft)
- {
- // Check the character at the current position
- char wildchar = pattern.charAt(wildcardSearchPos);
-
- // If it's not a wildcard character, then there is more
- // pattern information after this/these wildcards.
- if (wildchar != WILDCARD_CHAR && wildchar != WILDCARD_STRING)
- {
- justWildcardsLeft = false;
- }
- else
- {
- // to prevent "cat" matches "ca??"
- if (wildchar == WILDCARD_CHAR) {
- return false;
- }
-
- // Look at the next character
- wildcardSearchPos++;
- }
- }
-
- // This was a prefix wildcard search, and we've matched, so
- // return true.
- if (justWildcardsLeft)
- {
- return true;
- }
- }
-
- // If we've gone past the end of the string, or the pattern,
- // return false.
- if (sEnd || pEnd)
- {
- break;
- }
-
- // Match a single character, so continue.
- if (pattern.charAt(p) == WILDCARD_CHAR)
- {
- continue;
- }
-
- //
- if (pattern.charAt(p) == WILDCARD_STRING)
- {
- // Look at the character beyond the '*'.
- ++p;
- // Examine the string, starting at the last character.
- for (int i = string.length(); i >= s; --i)
- {
- if (wildcardEquals(pattern, p, string, i))
- {
- return true;
- }
- }
- break;
- }
- if (pattern.charAt(p) != string.charAt(s))
- {
- break;
- }
- }
- return false;
- }
-}
Index: lucene/src/java/org/apache/lucene/search/TermRangeQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (working copy)
@@ -129,12 +129,6 @@
/** Returns the collator used to determine range inclusion, if any. */
public Collator getCollator() { return collator; }
- @Override @Deprecated
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new TermRangeTermEnum(reader, field, lowerTerm,
- upperTerm, includeLower, includeUpper, collator);
- }
-
@Override
protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
Index: lucene/src/java/org/apache/lucene/search/SingleTermEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/SingleTermEnum.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/SingleTermEnum.java (working copy)
@@ -1,68 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-
-/**
- * Subclass of FilteredTermEnum for enumerating a single term.
- *
- * This can be used by {@link MultiTermQuery}s that need only visit one term,
- * but want to preserve MultiTermQuery semantics such as
- * {@link MultiTermQuery#rewriteMethod}.
- */
-@Deprecated
-public class SingleTermEnum extends FilteredTermEnum {
- private Term singleTerm;
- private boolean endEnum = false;
-
- /**
- * Creates a new SingleTermEnum.
- *
- * After calling the constructor the enumeration is already pointing to the term,
- * if it exists.
- */
- public SingleTermEnum(IndexReader reader, Term singleTerm) throws IOException {
- super();
- this.singleTerm = singleTerm;
- setEnum(reader.terms(singleTerm));
- }
-
- @Override
- public float difference() {
- return 1.0F;
- }
-
- @Override
- protected boolean endEnum() {
- return endEnum;
- }
-
- @Override
- protected boolean termCompare(Term term) {
- if (term.equals(singleTerm)) {
- return true;
- } else {
- endEnum = true;
- return false;
- }
- }
-}
Index: lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java (working copy)
@@ -1,115 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
-
-/** Abstract class for enumerating a subset of all terms.
-
-
Term enumerations are always ordered by Term.compareTo(). Each term in
- the enumeration is greater than all that precede it.
-
- @deprecated Switch to {@link FilteredTermsEnum} instead.
-*/
-@Deprecated
-public abstract class FilteredTermEnum extends TermEnum {
- /** the current term */
- protected Term currentTerm = null;
-
- /** the delegate enum - to set this member use {@link #setEnum} */
- protected TermEnum actualEnum = null;
-
- public FilteredTermEnum() {}
-
- /** Equality compare on the term */
- protected abstract boolean termCompare(Term term);
-
- /** Equality measure on the term, it is in reality a boost
- * factor and used like so in {@link MultiTermQuery},
- * so the name is wrong.
- * @deprecated Use {@link MultiTermQuery.BoostAttribute}
- * together with {@link FilteredTermsEnum}. For example
- * see {@link FuzzyTermsEnum}
- */
- @Deprecated
- public abstract float difference();
-
- /** Indicates the end of the enumeration has been reached */
- protected abstract boolean endEnum();
-
- /**
- * use this method to set the actual TermEnum (e.g. in ctor),
- * it will be automatically positioned on the first matching term.
- */
- protected void setEnum(TermEnum actualEnum) throws IOException {
- this.actualEnum = actualEnum;
- // Find the first term that matches
- Term term = actualEnum.term();
- if (term != null && termCompare(term))
- currentTerm = term;
- else next();
- }
-
- /**
- * Returns the docFreq of the current Term in the enumeration.
- * Returns -1 if no Term matches or all terms have been enumerated.
- */
- @Override
- public int docFreq() {
- if (currentTerm == null) return -1;
- assert actualEnum != null;
- return actualEnum.docFreq();
- }
-
- /** Increments the enumeration to the next element. True if one exists. */
- @Override
- public boolean next() throws IOException {
- if (actualEnum == null) return false; // the actual enumerator is not initialized!
- currentTerm = null;
- while (currentTerm == null) {
- if (endEnum()) return false;
- if (actualEnum.next()) {
- Term term = actualEnum.term();
- if (termCompare(term)) {
- currentTerm = term;
- return true;
- }
- }
- else return false;
- }
- currentTerm = null;
- return false;
- }
-
- /** Returns the current Term in the enumeration.
- * Returns null if no Term matches or all terms have been enumerated. */
- @Override
- public Term term() {
- return currentTerm;
- }
-
- /** Closes the enumeration to further activity, freeing resources. */
- @Override
- public void close() throws IOException {
- if (actualEnum != null) actualEnum.close();
- currentTerm = null;
- actualEnum = null;
- }
-}
Index: lucene/src/java/org/apache/lucene/search/WildcardQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/WildcardQuery.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/search/WildcardQuery.java (working copy)
@@ -81,17 +81,6 @@
return BasicOperations.concatenate(automata);
}
- @Override @Deprecated
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new WildcardTermEnum(reader, term);
- }
-
- // we override this method, else backwards layer in MTQ will prefer getEnum!
- @Override
- protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- return super.getTermsEnum(reader);
- }
-
/**
* Returns the pattern term.
*/
Index: lucene/src/java/org/apache/lucene/index/AllDocsEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/AllDocsEnum.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/AllDocsEnum.java (working copy)
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.index;
-
-import org.apache.lucene.util.Bits;
-import java.io.IOException;
-
-class AllDocsEnum extends DocsEnum {
- protected final Bits skipDocs;
- protected final int maxDoc;
- protected final IndexReader reader;
- protected int doc = -1;
-
- protected AllDocsEnum(IndexReader reader, Bits skipDocs) {
- this.skipDocs = skipDocs;
- this.maxDoc = reader.maxDoc();
- this.reader = reader;
- }
-
- @Override
- public int freq() {
- return 1;
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(doc+1);
- }
-
- @Override
- public int read() throws IOException {
- final int[] docs = bulkResult.docs.ints;
- final int[] freqs = bulkResult.freqs.ints;
- int i = 0;
- while (i < docs.length && doc < maxDoc) {
- if (skipDocs == null || !skipDocs.get(doc)) {
- docs[i] = doc;
- freqs[i] = 1;
- ++i;
- }
- doc++;
- }
- return i;
- }
-
- @Override
- public int advance(int target) throws IOException {
- doc = target;
- while (doc < maxDoc) {
- if (skipDocs == null || !skipDocs.get(doc)) {
- return doc;
- }
- doc++;
- }
- doc = NO_MORE_DOCS;
- return doc;
- }
-}
Index: lucene/src/java/org/apache/lucene/index/LegacySegmentMergeQueue.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/LegacySegmentMergeQueue.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/LegacySegmentMergeQueue.java (working copy)
@@ -1,42 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import org.apache.lucene.util.PriorityQueue;
-
-final class LegacySegmentMergeQueue extends PriorityQueue {
- LegacySegmentMergeQueue(int size) {
- initialize(size);
- }
-
- @Override
- protected final boolean lessThan(LegacySegmentMergeInfo a, LegacySegmentMergeInfo b) {
- int comparison = a.term.compareTo(b.term);
- if (comparison == 0)
- return a.base < b.base;
- else
- return comparison < 0;
- }
-
- final void close() throws IOException {
- while (top() != null)
- ((LegacySegmentMergeInfo)pop()).close();
- }
-
-}
Index: lucene/src/java/org/apache/lucene/index/ParallelReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/ParallelReader.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/ParallelReader.java (working copy)
@@ -452,18 +452,6 @@
}
@Override
- public TermEnum terms() throws IOException {
- ensureOpen();
- return new ParallelTermEnum();
- }
-
- @Override
- public TermEnum terms(Term term) throws IOException {
- ensureOpen();
- return new ParallelTermEnum(term);
- }
-
- @Override
public int docFreq(Term term) throws IOException {
ensureOpen();
IndexReader reader = fieldToReader.get(term.field());
@@ -477,30 +465,6 @@
return reader == null? 0 : reader.docFreq(field, term);
}
- @Override
- public TermDocs termDocs(Term term) throws IOException {
- ensureOpen();
- return new ParallelTermDocs(term);
- }
-
- @Override
- public TermDocs termDocs() throws IOException {
- ensureOpen();
- return new ParallelTermDocs();
- }
-
- @Override
- public TermPositions termPositions(Term term) throws IOException {
- ensureOpen();
- return new ParallelTermPositions(term);
- }
-
- @Override
- public TermPositions termPositions() throws IOException {
- ensureOpen();
- return new ParallelTermPositions();
- }
-
/**
* Checks recursively if all subreaders are up to date.
*/
@@ -574,168 +538,6 @@
}
return fieldSet;
}
-
- @Deprecated
- private class ParallelTermEnum extends TermEnum {
- private String field;
- private Iterator fieldIterator;
- private TermEnum termEnum;
-
- public ParallelTermEnum() throws IOException {
- try {
- field = fieldToReader.firstKey();
- } catch(NoSuchElementException e) {
- // No fields, so keep field == null, termEnum == null
- return;
- }
- if (field != null)
- termEnum = fieldToReader.get(field).terms();
- }
-
- public ParallelTermEnum(Term term) throws IOException {
- field = term.field();
- IndexReader reader = fieldToReader.get(field);
- if (reader!=null)
- termEnum = reader.terms(term);
- }
-
- @Override
- public boolean next() throws IOException {
- if (termEnum==null)
- return false;
-
- // another term in this field?
- if (termEnum.next() && termEnum.term().field()==field)
- return true; // yes, keep going
-
- termEnum.close(); // close old termEnum
-
- // find the next field with terms, if any
- if (fieldIterator==null) {
- fieldIterator = fieldToReader.tailMap(field).keySet().iterator();
- fieldIterator.next(); // Skip field to get next one
- }
- while (fieldIterator.hasNext()) {
- field = fieldIterator.next();
- termEnum = fieldToReader.get(field).terms(new Term(field));
- Term term = termEnum.term();
- if (term!=null && term.field()==field)
- return true;
- else
- termEnum.close();
- }
-
- return false; // no more fields
- }
-
- @Override
- public Term term() {
- if (termEnum==null)
- return null;
-
- return termEnum.term();
- }
-
- @Override
- public int docFreq() {
- if (termEnum==null)
- return 0;
-
- return termEnum.docFreq();
- }
-
- @Override
- public void close() throws IOException {
- if (termEnum!=null)
- termEnum.close();
- }
-
- }
-
- // wrap a TermDocs in order to support seek(Term)
- private class ParallelTermDocs implements TermDocs {
- protected TermDocs termDocs;
-
- public ParallelTermDocs() {}
- public ParallelTermDocs(Term term) throws IOException {
- if (term == null)
- termDocs = readers.isEmpty() ? null : readers.get(0).termDocs(null);
- else
- seek(term);
- }
-
- public int doc() { return termDocs.doc(); }
- public int freq() { return termDocs.freq(); }
-
- public void seek(Term term) throws IOException {
- IndexReader reader = fieldToReader.get(term.field());
- termDocs = reader!=null ? reader.termDocs(term) : null;
- }
-
- public void seek(TermEnum termEnum) throws IOException {
- seek(termEnum.term());
- }
-
- public boolean next() throws IOException {
- if (termDocs==null)
- return false;
-
- return termDocs.next();
- }
-
- public int read(final int[] docs, final int[] freqs) throws IOException {
- if (termDocs==null)
- return 0;
-
- return termDocs.read(docs, freqs);
- }
-
- public boolean skipTo(int target) throws IOException {
- if (termDocs==null)
- return false;
-
- return termDocs.skipTo(target);
- }
-
- public void close() throws IOException {
- if (termDocs!=null)
- termDocs.close();
- }
-
- }
-
- private class ParallelTermPositions
- extends ParallelTermDocs implements TermPositions {
-
- public ParallelTermPositions() {}
- public ParallelTermPositions(Term term) throws IOException { seek(term); }
-
- @Override
- public void seek(Term term) throws IOException {
- IndexReader reader = fieldToReader.get(term.field());
- termDocs = reader!=null ? reader.termPositions(term) : null;
- }
-
- public int nextPosition() throws IOException {
- // It is an error to call this if there is no next position, e.g. if termDocs==null
- return ((TermPositions)termDocs).nextPosition();
- }
-
- public int getPayloadLength() throws IOException {
- return ((TermPositions)termDocs).getPayloadLength();
- }
-
- public byte[] getPayload(byte[] data, int offset) throws IOException {
- return ((TermPositions)termDocs).getPayload(data, offset);
- }
-
-
- // TODO: Remove warning after API has been finalized
- public boolean isPayloadAvailable() {
- return ((TermPositions) termDocs).isPayloadAvailable();
- }
- }
-
}
Index: lucene/src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/SegmentReader.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/SegmentReader.java (working copy)
@@ -39,11 +39,7 @@
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.CloseableThreadLocal;
-import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.codecs.preflex.PreFlexFields;
-import org.apache.lucene.index.codecs.preflex.SegmentTermDocs;
-import org.apache.lucene.index.codecs.preflex.SegmentTermPositions;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.BytesRef;
@@ -93,7 +89,6 @@
final FieldInfos fieldInfos;
final FieldsProducer fields;
- final boolean isPreFlex;
final CodecProvider codecs;
final Directory dir;
@@ -140,7 +135,6 @@
fields = si.getCodec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor));
assert fields != null;
- isPreFlex = fields instanceof PreFlexFields;
success = true;
} finally {
if (!success) {
@@ -824,37 +818,6 @@
return new ArrayList(si.files());
}
- @Override
- public TermEnum terms() throws IOException {
- ensureOpen();
- if (core.isPreFlex) {
- // For old API on an old segment, instead of
- // converting old API -> new API -> old API, just give
- // direct access to old:
- return ((PreFlexFields) core.fields).tis.terms();
- } else {
- // Emulate pre-flex API on top of flex index
- return new LegacyTermEnum(null);
- }
- }
-
- /** @deprecated Please switch to the flex API ({@link
- * #fields}) instead. */
- @Deprecated
- @Override
- public TermEnum terms(Term t) throws IOException {
- ensureOpen();
- if (core.isPreFlex) {
- // For old API on an old segment, instead of
- // converting old API -> new API -> old API, just give
- // direct access to old:
- return ((PreFlexFields) core.fields).tis.terms(t);
- } else {
- // Emulate pre-flex API on top of flex index
- return new LegacyTermEnum(t);
- }
- }
-
FieldInfos fieldInfos() {
return core.fieldInfos;
}
@@ -870,75 +833,12 @@
return (deletedDocs != null && deletedDocs.get(n));
}
- /** @deprecated Switch to the flex API ({@link
- * IndexReader#termDocsEnum}) instead. */
- @Deprecated
@Override
- public TermDocs termDocs(Term term) throws IOException {
- if (term == null) {
- return new AllTermDocs(this);
- } else {
- return super.termDocs(term);
- }
- }
-
- @Override
public Fields fields() throws IOException {
return core.fields;
}
- /** @deprecated Switch to the flex API {@link
- * IndexReader#termDocsEnum} instead. */
- @Deprecated
@Override
- public TermDocs termDocs() throws IOException {
- ensureOpen();
- if (core.isPreFlex) {
- // For old API on an old segment, instead of
- // converting old API -> new API -> old API, just give
- // direct access to old:
- final PreFlexFields pre = (PreFlexFields) core.fields;
- SegmentTermDocs std = new SegmentTermDocs(pre.freqStream, pre.tis, core.fieldInfos);
- std.setSkipDocs(deletedDocs);
- return std;
- } else {
- // Emulate old API
- return new LegacyTermDocs();
- }
- }
-
- /** @deprecated Switch to the flex API {@link
- * IndexReader#termDocsEnum} instead */
- @Deprecated
- @Override
- public TermPositions termPositions() throws IOException {
- ensureOpen();
- if (core.isPreFlex) {
- // For old API on an old segment, instead of
- // converting old API -> new API -> old API, just give
- // direct access to old:
- final PreFlexFields pre = (PreFlexFields) core.fields;
- SegmentTermPositions stp = new SegmentTermPositions(pre.freqStream, pre.proxStream, pre.tis, core.fieldInfos);
- stp.setSkipDocs(deletedDocs);
- return stp;
- } else {
- // Emulate old API
- return new LegacyTermPositions();
- }
- }
-
- @Override
- public int docFreq(Term t) throws IOException {
- ensureOpen();
- Terms terms = core.fields.terms(t.field);
- if (terms != null) {
- return terms.docFreq(new BytesRef(t.text));
- } else {
- return 0;
- }
- }
-
- @Override
public int docFreq(String field, BytesRef term) throws IOException {
ensureOpen();
@@ -1328,393 +1228,4 @@
public int getTermInfosIndexDivisor() {
return core.termsIndexDivisor;
}
-
- // Back compat: pre-flex TermEnum API over flex API
- @Deprecated
- final private class LegacyTermEnum extends TermEnum {
- FieldsEnum fields;
- TermsEnum terms;
- boolean done;
- String currentField;
- BytesRef currentTerm;
-
- public LegacyTermEnum(Term t) throws IOException {
- fields = core.fields.iterator();
- currentField = fields.next();
- if (currentField == null) {
- // no fields
- done = true;
- } else if (t != null) {
- // Pre-seek to this term
-
- while(currentField.compareTo(t.field) < 0) {
- currentField = fields.next();
- if (currentField == null) {
- // Hit end of fields
- done = true;
- break;
- }
- }
-
- if (!done) {
- // We found some field -- get its terms:
- terms = fields.terms();
-
- if (currentField == t.field) {
- // We found exactly the requested field; now
- // seek the term text:
- String text = t.text();
-
- // this is only for backwards compatibility.
- // previously you could supply a term with unpaired surrogates,
- // and it would return the next Term.
- // if someone does this, tack on the lowest possible trail surrogate.
- // this emulates the old behavior, and forms "valid UTF-8" unicode.
- BytesRef tr = new BytesRef(UnicodeUtil.nextValidUTF16String(text));
- TermsEnum.SeekStatus status = terms.seek(tr);
-
- if (status == TermsEnum.SeekStatus.END) {
- // Rollover to the next field
- terms = null;
- next();
- } else if (status == TermsEnum.SeekStatus.FOUND) {
- // Found exactly the term
- currentTerm = tr;
- } else {
- // Found another term, in this same field
- currentTerm = terms.term();
- }
- } else {
- // We didn't find exact field (we found the
- // following field); advance to first term in
- // this field
- next();
- }
- }
- } else {
- terms = fields.terms();
- }
- }
-
- @Override
- public boolean next() throws IOException {
-
- if (done) {
- return false;
- }
-
- while(true) {
- if (terms == null) {
- // Advance to the next field
- currentField = fields.next();
- if (currentField == null) {
- done = true;
- return false;
- }
- terms = fields.terms();
- }
- currentTerm = terms.next();
- if (currentTerm != null) {
- // This field still has terms
- return true;
- } else {
- // Done producing terms from this field; advance
- // to next field
- terms = null;
- }
- }
- }
-
- @Override
- public Term term() {
- if (!done && terms != null && currentTerm != null) {
- return new Term(currentField, currentTerm.utf8ToString());
- }
- return null;
- }
-
- @Override
- public int docFreq() {
- return terms == null ? 0 : terms.docFreq();
- }
-
- @Override
- public void close() {}
- }
-
- // Back compat: emulates legacy TermDocs API on top of
- // flex API
- private class LegacyTermDocs implements TermDocs {
-
- String currentField;
- final Fields fields;
- TermsEnum terms;
- DocsEnum docsEnum;
- boolean any;
-
- LegacyTermDocs() throws IOException {
- fields = core.fields;
- }
-
- public void close() {}
-
- public void seek(TermEnum termEnum) throws IOException {
- seek(termEnum.term());
- }
-
- public boolean skipTo(int target) throws IOException {
- if (!any) {
- return false;
- } else {
- return docsEnum.advance(target) != docsEnum.NO_MORE_DOCS;
- }
- }
-
- public void seek(Term term) throws IOException {
-
- any = false;
-
- if (terms != null && !term.field.equals(currentField)) {
- // new field
- terms = null;
- }
-
- if (terms == null) {
- currentField = term.field;
- Terms terms1 = fields.terms(currentField);
- if (terms1 == null) {
- // no such field
- return;
- } else {
- terms = terms1.iterator();
- }
- }
-
- if (terms.seek(new BytesRef(term.text)) == TermsEnum.SeekStatus.FOUND) {
- // Term exists
- any = true;
- pendingBulkResult = null;
- docsEnum = terms.docs(deletedDocs, docsEnum);
- }
- }
-
- public int doc() {
- if (!any) {
- return 0;
- } else {
- return docsEnum.docID();
- }
- }
-
- private DocsEnum.BulkReadResult pendingBulkResult;
- private int bulkCount;
- private int pendingBulk;
-
- public int read(int[] docs, int[] freqs) throws IOException {
- if (any && pendingBulkResult == null) {
- pendingBulkResult = docsEnum.getBulkResult();
- }
- if (!any) {
- return 0;
- } else if (pendingBulk > 0) {
- final int left = bulkCount - pendingBulk;
- if (docs.length >= left) {
- // read all pending
- System.arraycopy(pendingBulkResult.docs.ints, pendingBulk, docs, 0, left);
- System.arraycopy(pendingBulkResult.freqs.ints, pendingBulk, freqs, 0, left);
- pendingBulk = 0;
- return left;
- } else {
- // read only part of pending
- System.arraycopy(pendingBulkResult.docs.ints, pendingBulk, docs, 0, docs.length);
- System.arraycopy(pendingBulkResult.freqs.ints, pendingBulk, freqs, 0, docs.length);
- pendingBulk += docs.length;
- return docs.length;
- }
- } else {
- // nothing pending
- bulkCount = docsEnum.read();
- if (docs.length >= bulkCount) {
- System.arraycopy(pendingBulkResult.docs.ints, 0, docs, 0, bulkCount);
- System.arraycopy(pendingBulkResult.freqs.ints, 0, freqs, 0, bulkCount);
- return bulkCount;
- } else {
- System.arraycopy(pendingBulkResult.docs.ints, 0, docs, 0, docs.length);
- System.arraycopy(pendingBulkResult.freqs.ints, 0, freqs, 0, docs.length);
- pendingBulk = docs.length;
- return docs.length;
- }
- }
- }
-
- public int freq() {
- if (!any) {
- return 0;
- } else {
- return docsEnum.freq();
- }
- }
-
- public boolean next() throws IOException {
- if (!any) {
- return false;
- } else {
- return docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS;
- }
- }
- }
-
- // Back compat: implements legacy TermPositions API on top
- // of flex API
- final private class LegacyTermPositions implements TermPositions {
-
- String currentField;
- final Fields fields;
- TermsEnum terms;
- DocsAndPositionsEnum postingsEnum;
- DocsEnum docsEnum;
- boolean any;
-
- LegacyTermPositions() throws IOException {
- fields = core.fields;
- }
-
- public void close() {}
-
- public void seek(TermEnum termEnum) throws IOException {
- seek(termEnum.term());
- }
-
- public boolean skipTo(int target) throws IOException {
- if (!any) {
- return false;
- } else {
- return docsEnum.advance(target) != docsEnum.NO_MORE_DOCS;
- }
- }
-
- public void seek(Term term) throws IOException {
-
- any = false;
-
- if (terms != null && !term.field.equals(currentField)) {
- // new field
- terms = null;
- }
-
- if (terms == null) {
- currentField = term.field;
- Terms terms1 = fields.terms(currentField);
- if (terms1 == null) {
- // no such field
- return;
- } else {
- terms = terms1.iterator();
- }
- }
-
- if (terms.seek(new BytesRef(term.text)) == TermsEnum.SeekStatus.FOUND) {
- // Term exists
- any = true;
- postingsEnum = terms.docsAndPositions(deletedDocs, postingsEnum);
- if (postingsEnum == null) {
- docsEnum = terms.docs(deletedDocs, postingsEnum);
- } else {
- docsEnum = postingsEnum;
- }
- }
- }
-
- public int doc() {
- if (!any) {
- return 0;
- } else {
- return docsEnum.docID();
- }
- }
-
- public int freq() {
- if (!any) {
- return 0;
- } else {
- return docsEnum.freq();
- }
- }
-
- public boolean next() throws IOException {
- if (!any) {
- return false;
- } else {
- return docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS;
- }
- }
-
- public int read(int[] docs, int[] freqs) throws IOException {
- throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
- }
-
- public int nextPosition() throws IOException {
- pendingPayload = null;
- if (!any || postingsEnum == null) {
- return 0;
- } else {
- return postingsEnum.nextPosition();
- }
- }
-
- private BytesRef pendingPayload;
-
- public int getPayloadLength() throws IOException {
- if (!any || postingsEnum == null) {
- return 0;
- } else {
- if (pendingPayload == null) {
- if (!postingsEnum.hasPayload()) {
- return 0;
- }
- pendingPayload = postingsEnum.getPayload();
- }
- if (pendingPayload == null) {
- return 0;
- }
- return pendingPayload.length;
- }
- }
-
- public byte[] getPayload(byte[] bytes, int offset) throws IOException {
- if (!any || postingsEnum == null) {
- return null;
- }
- if (pendingPayload == null) {
- if (!postingsEnum.hasPayload()) {
- return null;
- }
- pendingPayload = postingsEnum.getPayload();
- }
- if (pendingPayload == null) {
- return null;
- }
-
- // old API would always used passed in bytes if it
- // "fits", else allocate new:
- if (bytes != null && pendingPayload.length <= bytes.length - offset) {
- System.arraycopy(pendingPayload.bytes, pendingPayload.offset, bytes, offset, pendingPayload.length);
- return bytes;
- } else if (pendingPayload.offset == 0 && pendingPayload.length == pendingPayload.bytes.length) {
- return pendingPayload.bytes;
- } else {
- final byte[] retBytes = new byte[pendingPayload.length];
- System.arraycopy(pendingPayload.bytes, pendingPayload.offset, retBytes, 0, pendingPayload.length);
- return retBytes;
- }
- }
-
- public boolean isPayloadAvailable() {
- if (!any || postingsEnum == null) {
- return false;
- } else {
- return postingsEnum.hasPayload();
- }
- }
- }
}
Index: lucene/src/java/org/apache/lucene/index/AbstractAllTermDocs.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/AbstractAllTermDocs.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/AbstractAllTermDocs.java (working copy)
@@ -1,92 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.index;
-
-import java.io.IOException;
-
-/** Base class for enumerating all but deleted docs.
- *
- * NOTE: this class is meant only to be used internally
- * by Lucene; it's only public so it can be shared across
- * packages. This means the API is freely subject to
- * change, and, the class could be removed entirely, in any
- * Lucene release. Use directly at your own risk! */
-@Deprecated
-public abstract class AbstractAllTermDocs implements TermDocs {
-
- protected int maxDoc;
- protected int doc = -1;
-
- protected AbstractAllTermDocs(int maxDoc) {
- this.maxDoc = maxDoc;
- }
-
- public void seek(Term term) throws IOException {
- if (term==null) {
- doc = -1;
- } else {
- throw new UnsupportedOperationException();
- }
- }
-
- public void seek(TermEnum termEnum) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- public int doc() {
- return doc;
- }
-
- public int freq() {
- return 1;
- }
-
- public boolean next() throws IOException {
- return skipTo(doc+1);
- }
-
- public int read(int[] docs, int[] freqs) throws IOException {
- final int length = docs.length;
- int i = 0;
- while (i < length && doc < maxDoc) {
- if (!isDeleted(doc)) {
- docs[i] = doc;
- freqs[i] = 1;
- ++i;
- }
- doc++;
- }
- return i;
- }
-
- public boolean skipTo(int target) throws IOException {
- doc = target;
- while (doc < maxDoc) {
- if (!isDeleted(doc)) {
- return true;
- }
- doc++;
- }
- return false;
- }
-
- public void close() throws IOException {
- }
-
- public abstract boolean isDeleted(int doc);
-}
\ No newline at end of file
Index: lucene/src/java/org/apache/lucene/index/AllTermDocs.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/AllTermDocs.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/AllTermDocs.java (working copy)
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.index;
-
-import org.apache.lucene.util.BitVector;
-
-/** @deprecated Switch to AllDocsEnum */
-@Deprecated
-class AllTermDocs extends AbstractAllTermDocs {
-
- protected BitVector deletedDocs;
-
- protected AllTermDocs(SegmentReader parent) {
- super(parent.maxDoc());
- synchronized (parent) {
- this.deletedDocs = parent.deletedDocs;
- }
- }
-
- @Override
- public boolean isDeleted(int doc) {
- return deletedDocs != null && deletedDocs.get(doc);
- }
-}
Index: lucene/src/java/org/apache/lucene/index/LegacySegmentMergeInfo.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/LegacySegmentMergeInfo.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/LegacySegmentMergeInfo.java (working copy)
@@ -1,86 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-// @deprecated This is pre-flex API
-final class LegacySegmentMergeInfo {
- Term term;
- int base;
- int ord; // the position of the segment in a MultiReader
- TermEnum termEnum;
- IndexReader reader;
- int delCount;
- private TermPositions postings; // use getPositions()
- private int[] docMap; // use getDocMap()
-
- LegacySegmentMergeInfo(int b, TermEnum te, IndexReader r)
- throws IOException {
- base = b;
- reader = r;
- termEnum = te;
- term = te.term();
- }
-
- // maps around deleted docs
- int[] getDocMap() {
- if (docMap == null) {
- delCount = 0;
- // build array which maps document numbers around deletions
- if (reader.hasDeletions()) {
- int maxDoc = reader.maxDoc();
- docMap = new int[maxDoc];
- int j = 0;
- for (int i = 0; i < maxDoc; i++) {
- if (reader.isDeleted(i)) {
- delCount++;
- docMap[i] = -1;
- } else
- docMap[i] = j++;
- }
- }
- }
- return docMap;
- }
-
- TermPositions getPositions() throws IOException {
- if (postings == null) {
- postings = reader.termPositions();
- }
- return postings;
- }
-
- final boolean next() throws IOException {
- if (termEnum.next()) {
- term = termEnum.term();
- return true;
- } else {
- term = null;
- return false;
- }
- }
-
- final void close() throws IOException {
- termEnum.close();
- if (postings != null) {
- postings.close();
- }
-}
-}
-
Index: lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (working copy)
@@ -30,7 +30,8 @@
public abstract int nextPosition() throws IOException;
/** Returns the payload at this position, or null if no
- * payload was indexed. */
+ * payload was indexed. Only call this once per
+ * position. */
public abstract BytesRef getPayload() throws IOException;
public abstract boolean hasPayload();
Index: lucene/src/java/org/apache/lucene/index/MultiReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/MultiReader.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/MultiReader.java (working copy)
@@ -25,9 +25,6 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.index.DirectoryReader.MultiTermDocs; // deprecated
-import org.apache.lucene.index.DirectoryReader.MultiTermEnum; // deprecated
-import org.apache.lucene.index.DirectoryReader.MultiTermPositions; // deprecated
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.Bits;
@@ -370,28 +367,6 @@
}
@Override
- public TermEnum terms() throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].terms();
- } else {
- return new MultiTermEnum(this, subReaders, starts, null);
- }
- }
-
- @Override
- public TermEnum terms(Term term) throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].terms(term);
- } else {
- return new MultiTermEnum(this, subReaders, starts, term);
- }
- }
-
- @Override
public int docFreq(Term t) throws IOException {
ensureOpen();
int total = 0; // sum freqs in segments
@@ -411,39 +386,6 @@
}
@Override
- public TermDocs termDocs() throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].termDocs();
- } else {
- return new MultiTermDocs(this, subReaders, starts);
- }
- }
-
- @Override
- public TermDocs termDocs(Term term) throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].termDocs(term);
- } else {
- return super.termDocs(term);
- }
- }
-
- @Override
- public TermPositions termPositions() throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].termPositions();
- } else {
- return new MultiTermPositions(this, subReaders, starts);
- }
- }
-
- @Override
protected void doCommit(Map commitUserData) throws IOException {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].commit(commitUserData);
Index: lucene/src/java/org/apache/lucene/index/DirectoryReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/DirectoryReader.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/DirectoryReader.java (working copy)
@@ -686,28 +686,6 @@
}
@Override
- public TermEnum terms() throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].terms();
- } else {
- return new MultiTermEnum(this, subReaders, starts, null);
- }
- }
-
- @Override
- public TermEnum terms(Term term) throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].terms(term);
- } else {
- return new MultiTermEnum(this, subReaders, starts, term);
- }
- }
-
- @Override
public int docFreq(Term t) throws IOException {
ensureOpen();
int total = 0; // sum freqs in segments
@@ -727,43 +705,10 @@
}
@Override
- public TermDocs termDocs() throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].termDocs();
- } else {
- return new MultiTermDocs(this, subReaders, starts);
- }
- }
-
- @Override
- public TermDocs termDocs(Term term) throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].termDocs(term);
- } else {
- return super.termDocs(term);
- }
- }
-
- @Override
public Fields fields() throws IOException {
throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)");
}
- @Override
- public TermPositions termPositions() throws IOException {
- ensureOpen();
- if (subReaders.length == 1) {
- // Optimize single segment case:
- return subReaders[0].termPositions();
- } else {
- return new MultiTermPositions(this, subReaders, starts);
- }
- }
-
/**
* Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is directory
* owner.
@@ -1122,280 +1067,4 @@
throw new UnsupportedOperationException("This IndexCommit does not support deletions");
}
}
-
- // @deprecated This is pre-flex API
- // Exposes pre-flex API by doing on-the-fly merging
- // pre-flex API to each segment
- static class MultiTermEnum extends TermEnum {
- IndexReader topReader; // used for matching TermEnum to TermDocs
- private LegacySegmentMergeQueue queue;
-
- private Term term;
- private int docFreq;
- final LegacySegmentMergeInfo[] matchingSegments; // null terminated array of matching segments
-
- public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t)
- throws IOException {
- this.topReader = topReader;
- queue = new LegacySegmentMergeQueue(readers.length);
- matchingSegments = new LegacySegmentMergeInfo[readers.length+1];
- for (int i = 0; i < readers.length; i++) {
- IndexReader reader = readers[i];
- TermEnum termEnum;
-
- if (t != null) {
- termEnum = reader.terms(t);
- } else {
- termEnum = reader.terms();
- }
-
- LegacySegmentMergeInfo smi = new LegacySegmentMergeInfo(starts[i], termEnum, reader);
- smi.ord = i;
- if (t == null ? smi.next() : termEnum.term() != null)
- queue.add(smi); // initialize queue
- else
- smi.close();
- }
-
- if (t != null && queue.size() > 0) {
- next();
- }
- }
-
- @Override
- public boolean next() throws IOException {
- for (int i=0; i {
- TermPositionsQueue(List termPositions) throws IOException {
- initialize(termPositions.size());
-
- for (TermPositions tp : termPositions) {
- if (tp.next())
- add(tp);
- }
- }
-
- final TermPositions peek() {
- return top();
- }
-
- @Override
- public final boolean lessThan(TermPositions a, TermPositions b) {
- return a.doc() < b.doc();
- }
- }
-
- private static final class IntQueue {
- private int _arraySize = 16;
- private int _index = 0;
- private int _lastIndex = 0;
- private int[] _array = new int[_arraySize];
-
- final void add(int i) {
- if (_lastIndex == _arraySize)
- growArray();
-
- _array[_lastIndex++] = i;
- }
-
- final int next() {
- return _array[_index++];
- }
-
- final void sort() {
- Arrays.sort(_array, _index, _lastIndex);
- }
-
- final void clear() {
- _index = 0;
- _lastIndex = 0;
- }
-
- final int size() {
- return (_lastIndex - _index);
- }
-
- private void growArray() {
- _array = ArrayUtil.grow(_array, _arraySize+1);
- _arraySize = _array.length;
- }
- }
-
- private int _doc;
- private int _freq;
- private TermPositionsQueue _termPositionsQueue;
- private IntQueue _posList;
-
- /**
- * Creates a new MultipleTermPositions instance.
- *
- * @exception IOException
- */
- public MultipleTermPositions(IndexReader indexReader, Term[] terms) throws IOException {
- List termPositions = new LinkedList();
-
- for (int i = 0; i < terms.length; i++)
- termPositions.add(indexReader.termPositions(terms[i]));
-
- _termPositionsQueue = new TermPositionsQueue(termPositions);
- _posList = new IntQueue();
- }
-
- public final boolean next() throws IOException {
- if (_termPositionsQueue.size() == 0)
- return false;
-
- _posList.clear();
- _doc = _termPositionsQueue.peek().doc();
-
- TermPositions tp;
- do {
- tp = _termPositionsQueue.peek();
-
- for (int i = 0; i < tp.freq(); i++)
- _posList.add(tp.nextPosition());
-
- if (tp.next())
- _termPositionsQueue.updateTop();
- else {
- _termPositionsQueue.pop();
- tp.close();
- }
- } while (_termPositionsQueue.size() > 0 && _termPositionsQueue.peek().doc() == _doc);
-
- _posList.sort();
- _freq = _posList.size();
-
- return true;
- }
-
- public final int nextPosition() {
- return _posList.next();
- }
-
- public final boolean skipTo(int target) throws IOException {
- while (_termPositionsQueue.peek() != null && target > _termPositionsQueue.peek().doc()) {
- TermPositions tp = _termPositionsQueue.pop();
- if (tp.skipTo(target))
- _termPositionsQueue.add(tp);
- else
- tp.close();
- }
- return next();
- }
-
- public final int doc() {
- return _doc;
- }
-
- public final int freq() {
- return _freq;
- }
-
- public final void close() throws IOException {
- while (_termPositionsQueue.size() > 0)
- _termPositionsQueue.pop().close();
- }
-
- /**
- * Not implemented.
- * @throws UnsupportedOperationException
- */
- public void seek(Term arg0) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Not implemented.
- * @throws UnsupportedOperationException
- */
- public void seek(TermEnum termEnum) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Not implemented.
- * @throws UnsupportedOperationException
- */
- public int read(int[] arg0, int[] arg1) throws IOException {
- throw new UnsupportedOperationException();
- }
-
-
- /**
- * Not implemented.
- * @throws UnsupportedOperationException
- */
- public int getPayloadLength() {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Not implemented.
- * @throws UnsupportedOperationException
- */
- public byte[] getPayload(byte[] data, int offset) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- /**
- *
- * @return false
- */
- // TODO: Remove warning after API has been finalized
- public boolean isPayloadAvailable() {
- return false;
- }
-}
Index: lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (working copy)
@@ -361,21 +361,7 @@
in.setNorm(d, f, b);
}
- // final to force subclass to impl flex APIs, instead
@Override
- public final TermEnum terms() throws IOException {
- ensureOpen();
- return in.terms();
- }
-
- // final to force subclass to impl flex APIs, instead
- @Override
- public final TermEnum terms(Term t) throws IOException {
- ensureOpen();
- return in.terms(t);
- }
-
- @Override
public int docFreq(Term t) throws IOException {
ensureOpen();
return in.docFreq(t);
@@ -387,28 +373,7 @@
return in.docFreq(field, t);
}
- // final to force subclass to impl flex APIs, instead
@Override
- public final TermDocs termDocs() throws IOException {
- ensureOpen();
- return in.termDocs();
- }
-
- // final to force subclass to impl flex APIs, instead
- @Override
- public final TermDocs termDocs(Term term) throws IOException {
- ensureOpen();
- return in.termDocs(term);
- }
-
- // final to force subclass to impl flex APIs, instead
- @Override
- public final TermPositions termPositions() throws IOException {
- ensureOpen();
- return in.termPositions();
- }
-
- @Override
protected void doDelete(int n) throws CorruptIndexException, IOException { in.deleteDocument(n); }
@Override
Index: lucene/src/java/org/apache/lucene/index/TermPositions.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/TermPositions.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/TermPositions.java (working copy)
@@ -1,80 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-/**
- * TermPositions provides an interface for enumerating the <document,
- * frequency, <position>* > tuples for a term. The document and
- * frequency are the same as for a TermDocs. The positions portion lists the ordinal
- * positions of each occurrence of a term in a document.
- *
- * @see IndexReader#termPositions()
- * @deprecated Use {@link DocsAndPositionsEnum} instead
- */
-@Deprecated
-public interface TermPositions
- extends TermDocs
-{
- /** Returns next position in the current document. It is an error to call
- this more than {@link #freq()} times
- without calling {@link #next()}
This is
- invalid until {@link #next()} is called for
- the first time.
- */
- int nextPosition() throws IOException;
-
- /**
- * Returns the length of the payload at the current term position.
- * This is invalid until {@link #nextPosition()} is called for
- * the first time.
- * @return length of the current payload in number of bytes
- */
- int getPayloadLength() throws IOException;
-
- /**
- * Returns the payload data at the current term position.
- * This is invalid until {@link #nextPosition()} is called for
- * the first time.
- * This method must not be called more than once after each call
- * of {@link #nextPosition()}. However, payloads are loaded lazily,
- * so if the payload data for the current position is not needed,
- * this method may not be called at all for performance reasons.
- *
- * @param data the array into which the data of this payload is to be
- * stored, if it is big enough; otherwise, a new byte[] array
- * is allocated for this purpose.
- * @param offset the offset in the array into which the data of this payload
- * is to be stored.
- * @return a byte[] array containing the data of this payload
- * @throws IOException
- */
- byte[] getPayload(byte[] data, int offset) throws IOException;
-
- /**
- * Checks if a payload can be loaded at this position.
- *
- * Payloads can only be loaded once per call to
- * {@link #nextPosition()}.
- *
- * @return true if there is a payload available at this position that can be loaded
- */
- public boolean isPayloadAvailable();
-
-}
Index: lucene/src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy)
@@ -868,18 +868,6 @@
setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
}
- /** Returns an enumeration of all the terms in the index. The
- * enumeration is ordered by Term.compareTo(). Each term is greater
- * than all that precede it in the enumeration. Note that after
- * calling terms(), {@link TermEnum#next()} must be called
- * on the resulting enumeration before calling other methods such as
- * {@link TermEnum#term()}.
- * @deprecated Use the new flex API ({@link #fields()}) instead.
- * @throws IOException if there is a low-level IO error
- */
- @Deprecated
- public abstract TermEnum terms() throws IOException;
-
/** Flex API: returns {@link Fields} for this reader.
* This method may return null if the reader has no
* postings.
@@ -893,24 +881,10 @@
* using {@link ReaderUtil#gatherSubReaders} and iterate
* through them yourself. */
public abstract Fields fields() throws IOException;
-
- /** Returns an enumeration of all terms starting at a given term. If
- * the given term does not exist, the enumeration is positioned at the
- * first term greater than the supplied term. The enumeration is
- * ordered by Term.compareTo(). Each term is greater than all that
- * precede it in the enumeration.
- * @deprecated Use the new flex API ({@link #fields()}) instead.
- * @throws IOException if there is a low-level IO error
- */
- @Deprecated
- public abstract TermEnum terms(Term t) throws IOException;
- /** Returns the number of documents containing the term t.
- * @throws IOException if there is a low-level IO error
- * @deprecated Use {@link #docFreq(String,BytesRef)} instead.
- */
- @Deprecated
- public abstract int docFreq(Term t) throws IOException;
+ public int docFreq(Term term) throws IOException {
+ return docFreq(term.field(), new BytesRef(term.text()));
+ }
/** Returns the number of documents containing the term
* t. This method returns 0 if the term or
@@ -929,28 +903,6 @@
return terms.docFreq(term);
}
- /** Returns an enumeration of all the documents which contain
- * term. For each document, the document number, the frequency of
- * the term in that document is also provided, for use in
- * search scoring. If term is null, then all non-deleted
- * docs are returned with freq=1.
- * Thus, this method implements the mapping:
- *
- * Term => <docNum, freq>*
- *
- * The enumeration is ordered by document number. Each document number
- * is greater than all that precede it in the enumeration.
- * @deprecated Use the new flex API ({@link #termDocsEnum}) instead.
- * @throws IOException if there is a low-level IO error
- */
- @Deprecated
- public TermDocs termDocs(Term term) throws IOException {
- ensureOpen();
- TermDocs termDocs = termDocs();
- termDocs.seek(term);
- return termDocs;
- }
-
/** This may return null if the field does not exist.*/
public Terms terms(String field) throws IOException {
final Fields fields = fields();
@@ -997,50 +949,6 @@
}
}
- /** Returns an unpositioned {@link TermDocs} enumerator.
- * @deprecated Use the new flex API ({@link #fields()}) instead.
- * @throws IOException if there is a low-level IO error
- */
- @Deprecated
- public abstract TermDocs termDocs() throws IOException;
-
- /** Returns an enumeration of all the documents which contain
- * term. For each document, in addition to the document number
- * and frequency of the term in that document, a list of all of the ordinal
- * positions of the term in the document is available. Thus, this method
- * implements the mapping:
- *
- *
- * Term => <docNum, freq,
- * <pos1, pos2, ...
- * posfreq-1>
- * >*
- *
- * This positional information facilitates phrase and proximity searching.
- *
The enumeration is ordered by document number. Each document number is
- * greater than all that precede it in the enumeration.
- * @deprecated Please switch the flex API ({@link
- * #termDocsEnum}) instead
- * @throws IOException if there is a low-level IO error
- */
- @Deprecated
- public TermPositions termPositions(Term term) throws IOException {
- ensureOpen();
- TermPositions termPositions = termPositions();
- termPositions.seek(term);
- return termPositions;
- }
-
- /** Returns an unpositioned {@link TermPositions} enumerator.
- * @deprecated Please switch the flex API ({@link
- * #termDocsEnum}) instead
- * @throws IOException if there is a low-level IO error
- */
- @Deprecated
- public abstract TermPositions termPositions() throws IOException;
-
-
-
/** Deletes the document numbered docNum. Once a document is
* deleted it will not appear in TermDocs or TermPositions enumerations.
* Attempts to read its field with the {@link #document}
@@ -1089,16 +997,16 @@
*/
public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
ensureOpen();
- TermDocs docs = termDocs(term);
+ DocsEnum docs = MultiFields.getTermDocsEnum(this,
+ MultiFields.getDeletedDocs(this),
+ term.field(),
+ new BytesRef(term.text()));
if (docs == null) return 0;
int n = 0;
- try {
- while (docs.next()) {
- deleteDocument(docs.doc());
- n++;
- }
- } finally {
- docs.close();
+ int doc;
+ while ((doc = docs.nextDoc()) != docs.NO_MORE_DOCS) {
+ deleteDocument(doc);
+ n++;
}
return n;
}
Index: lucene/src/java/org/apache/lucene/index/TermEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/TermEnum.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/TermEnum.java (working copy)
@@ -1,42 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Closeable;
-
-/** Abstract class for enumerating terms.
-
-
Term enumerations are always ordered by Term.compareTo(). Each term in
- the enumeration is greater than all that precede it.
-* @deprecated Use TermsEnum instead */
-
-@Deprecated
-public abstract class TermEnum implements Closeable {
- /** Increments the enumeration to the next element. True if one exists.*/
- public abstract boolean next() throws IOException;
-
- /** Returns the current Term in the enumeration.*/
- public abstract Term term();
-
- /** Returns the docFreq of the current Term in the enumeration.*/
- public abstract int docFreq();
-
- /** Closes the enumeration to further activity, freeing resources. */
- public abstract void close() throws IOException;
-}
Index: lucene/src/java/org/apache/lucene/index/TermDocs.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/TermDocs.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/TermDocs.java (working copy)
@@ -1,85 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Closeable;
-
-/** TermDocs provides an interface for enumerating <document, frequency>
- pairs for a term.
The document portion names each document containing
- the term. Documents are indicated by number. The frequency portion gives
- the number of times the term occurred in each document.
The pairs are
- ordered by document number.
-
- @see IndexReader#termDocs()
- @deprecated Use {@link DocsEnum} instead
-*/
-
-@Deprecated
-public interface TermDocs extends Closeable {
- /** Sets this to the data for a term.
- * The enumeration is reset to the start of the data for this term.
- */
- void seek(Term term) throws IOException;
-
- /** Sets this to the data for the current term in a {@link TermEnum}.
- * This may be optimized in some implementations.
- */
- void seek(TermEnum termEnum) throws IOException;
-
- /** Returns the current document number.
This is invalid until {@link
- #next()} is called for the first time.*/
- int doc();
-
- /** Returns the frequency of the term within the current document.
This
- is invalid until {@link #next()} is called for the first time.*/
- int freq();
-
- /** Moves to the next pair in the enumeration.
Returns true iff there is
- such a next pair in the enumeration. */
- boolean next() throws IOException;
-
- /** Attempts to read multiple entries from the enumeration, up to length of
- * docs. Document numbers are stored in docs, and term
- * frequencies are stored in freqs. The freqs array must be as
- * long as the docs array.
- *
- *
Returns the number of entries read. Zero is only returned when the
- * stream has been exhausted. */
- int read(int[] docs, int[] freqs) throws IOException;
-
- /** Skips entries to the first beyond the current whose document number is
- * greater than or equal to target.
Returns true iff there is such
- * an entry.
Behaves as if written:
- * boolean skipTo(int target) {
- * do {
- * if (!next())
- * return false;
- * } while (target > doc());
- * return true;
- * }
- *
- * Some implementations are considerably more efficient than that.
- */
- boolean skipTo(int target) throws IOException;
-
- /** Frees associated resources. */
- void close() throws IOException;
-}
-
-
Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (working copy)
@@ -152,11 +152,19 @@
@Override
public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
- final SegmentDocsEnum docsEnum;
+ SegmentDocsEnum docsEnum;
if (reuse == null) {
docsEnum = new SegmentDocsEnum(freqIn);
+ } else if (!(reuse instanceof SegmentDocsEnum)) {
+ docsEnum = new SegmentDocsEnum(freqIn);
} else {
docsEnum = (SegmentDocsEnum) reuse;
+ if (docsEnum.freqIn != freqIn) {
+ // If you are using ParellelReader, and pass in a
+ // reused DocsEnum, it could have come from another
+ // reader also using standard codec
+ docsEnum = new SegmentDocsEnum(freqIn);
+ }
}
return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
}
Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java (working copy)
@@ -20,7 +20,6 @@
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.CorruptIndexException;
@@ -30,7 +29,7 @@
* @lucene.experimental */
@Deprecated
-public final class SegmentTermEnum extends TermEnum implements Cloneable {
+public final class SegmentTermEnum implements Cloneable {
private IndexInput input;
FieldInfos fieldInfos;
long size;
@@ -128,7 +127,6 @@
}
/** Increments the enumeration to the next element. True if one exists.*/
- @Override
public final boolean next() throws IOException {
if (position++ >= size - 1) {
prevBuffer.set(termBuffer);
@@ -176,7 +174,6 @@
/** Returns the current Term in the enumeration.
Initially invalid, valid after next() called for the first time.*/
- @Override
public final Term term() {
return termBuffer.toTerm();
}
@@ -200,7 +197,6 @@
/** Returns the docFreq from the current TermInfo in the enumeration.
Initially invalid, valid after next() called for the first time.*/
- @Override
public final int docFreq() {
return termInfo.docFreq;
}
@@ -218,7 +214,6 @@
}
/** Closes the enumeration to further activity, freeing resources. */
- @Override
public final void close() throws IOException {
input.close();
}
Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java (working copy)
@@ -21,12 +21,11 @@
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositions;
import org.apache.lucene.store.IndexInput;
/** @lucene.experimental */
public final class SegmentTermPositions
-extends SegmentTermDocs implements TermPositions {
+extends SegmentTermDocs {
private IndexInput proxStream;
private IndexInput proxStreamOrig;
private int proxCount;
@@ -55,7 +54,6 @@
this.proxStreamOrig = proxStream; // the proxStream will be cloned lazily when nextPosition() is called for the first time
}
- @Override
final void seek(TermInfo ti, Term term) throws IOException {
super.seek(ti, term);
if (ti != null)
@@ -67,7 +65,6 @@
needToLoadPayload = false;
}
- @Override
public final void close() throws IOException {
super.close();
if (proxStream != null) proxStream.close();
@@ -99,13 +96,11 @@
return delta;
}
- @Override
protected final void skippingDoc() throws IOException {
// we remember to skip a document lazily
lazySkipProxCount += freq;
}
- @Override
public final boolean next() throws IOException {
// we remember to skip the remaining positions of the current
// document lazily
@@ -119,14 +114,12 @@
return false;
}
- @Override
public final int read(final int[] docs, final int[] freqs) {
throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
}
/** Called by super.skipTo(). */
- @Override
protected void skipProx(long proxPointer, int payloadLength) throws IOException {
// we save the pointer, we might have to skip there lazily
lazySkipPointer = proxPointer;
Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (revision 954967)
+++ lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (working copy)
@@ -22,8 +22,6 @@
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.codecs.standard.DefaultSkipListReader;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
@@ -31,7 +29,7 @@
/** @deprecated
* @lucene.experimental */
@Deprecated
-public class SegmentTermDocs implements TermDocs {
+public class SegmentTermDocs {
//protected SegmentReader parent;
private final FieldInfos fieldInfos;
private final TermInfosReader tis;
@@ -84,17 +82,16 @@
this.skipDocs = skipDocs;
}
- public void seek(TermEnum termEnum) throws IOException {
+ public void seek(SegmentTermEnum segmentTermEnum) throws IOException {
TermInfo ti;
Term term;
// use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
- if (termEnum instanceof SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == fieldInfos) { // optimized case
- SegmentTermEnum segmentTermEnum = ((SegmentTermEnum) termEnum);
+ if (segmentTermEnum.fieldInfos == fieldInfos) { // optimized case
term = segmentTermEnum.term();
ti = segmentTermEnum.termInfo();
} else { // punt case
- term = termEnum.term();
+ term = segmentTermEnum.term();
ti = tis.get(term);
}
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java (revision 954967)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java (working copy)
@@ -58,7 +58,10 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
@@ -84,8 +87,15 @@
public BasicQueryFactory getBasicQueryFactory() {return qf;}
- public TermEnum getTermEnum(String termText) throws IOException {
- return getIndexReader().terms(new Term(getFieldName(), termText));
+ public TermsEnum getTermsEnum(String termText) throws IOException {
+ Terms terms = MultiFields.getTerms(getIndexReader(), getFieldName());
+ if (terms != null) {
+ TermsEnum termsEnum = terms.iterator();
+ termsEnum.seek(new BytesRef(termText));
+ return termsEnum;
+ } else {
+ return null;
+ }
}
public int size() {return weightBySpanQuery.size();}
Index: lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java
===================================================================
--- lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java (revision 954967)
+++ lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java (working copy)
@@ -23,10 +23,13 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
import java.io.File;
import java.util.Date;
@@ -39,7 +42,7 @@
private static boolean deleting = false; // true during deletion pass
private static IndexReader reader; // existing index
private static IndexWriter writer; // new index being built
- private static TermEnum uidIter; // document id iterator
+ private static TermsEnum uidIter; // document id iterator
/** Indexer for HTML files.*/
public static void main(String[] argv) {
@@ -110,21 +113,24 @@
if (!create) { // incrementally update
reader = IndexReader.open(FSDirectory.open(index), false); // open existing index
- uidIter = reader.terms(new Term("uid", "")); // init uid iterator
+ Terms terms = MultiFields.getTerms(reader, "uid");
+ if (terms != null) {
+ uidIter = terms.iterator();
- indexDocs(file);
+ indexDocs(file);
- if (deleting) { // delete rest of stale docs
- while (uidIter.term() != null && uidIter.term().field() == "uid") {
- System.out.println("deleting " +
- HTMLDocument.uid2url(uidIter.term().text()));
- reader.deleteDocuments(uidIter.term());
- uidIter.next();
+ if (deleting) { // delete rest of stale docs
+ BytesRef text;
+ while ((text=uidIter.next()) != null) {
+ String termText = text.utf8ToString();
+ System.out.println("deleting " +
+ HTMLDocument.uid2url(termText));
+ reader.deleteDocuments(new Term("uid", termText));
+ }
+ deleting = false;
}
- deleting = false;
}
- uidIter.close(); // close uid iterator
reader.close(); // close existing index
} else // don't have exisiting
@@ -145,17 +151,21 @@
if (uidIter != null) {
String uid = HTMLDocument.uid(file); // construct uid for doc
- while (uidIter.term() != null && uidIter.term().field() == "uid" &&
- uidIter.term().text().compareTo(uid) < 0) {
- if (deleting) { // delete stale docs
- System.out.println("deleting " +
- HTMLDocument.uid2url(uidIter.term().text()));
- reader.deleteDocuments(uidIter.term());
+ BytesRef text;
+ while((text = uidIter.next()) != null) {
+ String termText = text.utf8ToString();
+ if (termText.compareTo(uid) < 0) {
+ if (deleting) { // delete stale docs
+ System.out.println("deleting " +
+ HTMLDocument.uid2url(termText));
+ reader.deleteDocuments(new Term("uid", termText));
+ }
+ } else {
+ break;
}
- uidIter.next();
}
- if (uidIter.term() != null && uidIter.term().field() == "uid" &&
- uidIter.term().text().compareTo(uid) == 0) {
+ if (text != null &&
+ text.utf8ToString().compareTo(uid) == 0) {
uidIter.next(); // keep matching docs
} else if (!deleting) { // add new docs
Document doc = HTMLDocument.Document(file);
Index: lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java
===================================================================
--- lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java (revision 954967)
+++ lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java (working copy)
@@ -23,8 +23,8 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
@@ -84,11 +84,11 @@
}
}
- public void testTermEnum() throws Exception {
+ public void testTermsEnum() throws Exception {
InstantiatedIndex ii = new InstantiatedIndex();
IndexReader r = new InstantiatedIndexReader(ii);
- termEnumTest(r);
+ termsEnumTest(r);
r.close();
ii.close();
@@ -97,17 +97,13 @@
Directory d = new RAMDirectory();
new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())).close();
r = IndexReader.open(d, false);
- termEnumTest(r);
+ termsEnumTest(r);
r.close();
d.close();
}
- public void termEnumTest(IndexReader r) throws Exception {
- TermEnum terms = r.terms();
-
- assertNull(terms.term());
- assertFalse(terms.next());
-
+ public void termsEnumTest(IndexReader r) throws Exception {
+ assertNull(MultiFields.getFields(r));
}
}
Index: lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
===================================================================
--- lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (revision 954967)
+++ lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (working copy)
@@ -33,15 +33,18 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Payload;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
-import org.apache.lucene.index.TermPositions;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
/**
* Asserts equality of content and behaviour of two index readers.
@@ -120,132 +123,70 @@
// test seek
Term t = new Term("c", "danny");
- TermEnum aprioriTermEnum = aprioriReader.terms(t);
- TermEnum testTermEnum = testReader.terms(t);
-
+ TermsEnum aprioriTermEnum = MultiFields.getTerms(aprioriReader, t.field()).iterator();
+ aprioriTermEnum.seek(new BytesRef(t.text()));
+ TermsEnum testTermEnum = MultiFields.getTerms(testReader, t.field()).iterator();
+ testTermEnum.seek(new BytesRef(t.text()));
assertEquals(aprioriTermEnum.term(), testTermEnum.term());
- t = aprioriTermEnum.term();
+ DocsEnum aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), null);
+ DocsEnum testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), null);
- aprioriTermEnum.close();
- testTermEnum.close();
-
- TermDocs aprioriTermDocs = aprioriReader.termDocs(t);
- TermDocs testTermDocs = testReader.termDocs(t);
-
- assertEquals(aprioriTermDocs.next(), testTermDocs.next());
+ assertEquals(aprioriTermDocs.nextDoc(), testTermDocs.nextDoc());
assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- if (aprioriTermDocs.skipTo(4)) {
- assertTrue(testTermDocs.skipTo(4));
+ if (aprioriTermDocs.advance(4) != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(testTermDocs.advance(4) != DocsEnum.NO_MORE_DOCS);
assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
} else {
- assertFalse(testTermDocs.skipTo(4));
+ assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.advance(4));
}
- if (aprioriTermDocs.next()) {
- assertTrue(testTermDocs.next());
+ if (aprioriTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
} else {
- assertFalse(testTermDocs.next());
+ assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc());
}
// beyond this point all next and skipto will return false
- if (aprioriTermDocs.skipTo(100)) {
- assertTrue(testTermDocs.skipTo(100));
+ if (aprioriTermDocs.advance(100) != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(testTermDocs.advance(100) != DocsEnum.NO_MORE_DOCS);
assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
} else {
- assertFalse(testTermDocs.skipTo(100));
+ assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.advance(100));
}
+ // start using the API the way one is supposed to use it
- if (aprioriTermDocs.next()) {
- assertTrue(testTermDocs.next());
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- } else {
- assertFalse(testTermDocs.next());
- }
+ t = new Term("", "");
+ FieldsEnum apFieldsEnum = MultiFields.getFields(aprioriReader).iterator();
+ String apFirstField = apFieldsEnum.next();
- if (aprioriTermDocs.skipTo(110)) {
- assertTrue(testTermDocs.skipTo(110));
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- } else {
- assertFalse(testTermDocs.skipTo(110));
- }
+ FieldsEnum testFieldsEnum = MultiFields.getFields(testReader).iterator();
+ String testFirstField = testFieldsEnum.next();
+ assertEquals(apFirstField, testFirstField);
- if (aprioriTermDocs.skipTo(10)) {
- assertTrue(testTermDocs.skipTo(10));
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- } else {
- assertFalse(testTermDocs.skipTo(10));
- }
+ aprioriTermEnum = apFieldsEnum.terms();
+ testTermEnum = testFieldsEnum.terms();
+
+ assertEquals(aprioriTermEnum.next(), testTermEnum.next());
+
+ aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), aprioriTermDocs);
+ testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), testTermDocs);
-
- if (aprioriTermDocs.skipTo(210)) {
- assertTrue(testTermDocs.skipTo(210));
+ while (aprioriTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- } else {
- assertFalse(testTermDocs.skipTo(210));
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
}
+ assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc());
- aprioriTermDocs.close();
- testTermDocs.close();
-
-
-
- // test seek null (AllTermDocs)
- aprioriTermDocs = aprioriReader.termDocs(null);
- testTermDocs = testReader.termDocs(null);
-
- while (aprioriTermDocs.next()) {
- assertTrue(testTermDocs.next());
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- }
- assertFalse(testTermDocs.next());
-
-
- aprioriTermDocs.close();
- testTermDocs.close();
-
-
- // test seek default
- aprioriTermDocs = aprioriReader.termDocs();
- testTermDocs = testReader.termDocs();
-
- // this is invalid use of the API,
- // but if the response differs then it's an indication that something might have changed.
- // in 2.9 and 3.0 the two TermDocs-implementations returned different values at this point.
-// assertEquals("Descripency during invalid use of the TermDocs API, see comments in test code for details.",
-// aprioriTermDocs.next(), testTermDocs.next());
-
- // start using the API the way one is supposed to use it
-
- t = new Term("", "");
- aprioriTermDocs.seek(t);
- testTermDocs.seek(t);
-
- while (aprioriTermDocs.next()) {
- assertTrue(testTermDocs.next());
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- }
- assertFalse(testTermDocs.next());
-
- aprioriTermDocs.close();
- testTermDocs.close();
-
-
// clean up
aprioriReader.close();
testReader.close();
@@ -443,98 +384,85 @@
// compare term enumeration stepping
- TermEnum aprioriTermEnum = aprioriReader.terms();
- TermEnum testTermEnum = testReader.terms();
+ FieldsEnum aprioriFieldsEnum = MultiFields.getFields(aprioriReader).iterator();
+ FieldsEnum testFieldsEnum = MultiFields.getFields(testReader).iterator();
+ String aprioriField;
+ while((aprioriField = aprioriFieldsEnum.next()) != null) {
+ String testField = testFieldsEnum.next();
+ assertEquals(aprioriField, testField);
- while (true) {
+ TermsEnum aprioriTermEnum = aprioriFieldsEnum.terms();
+ TermsEnum testTermEnum = testFieldsEnum.terms();
- if (!aprioriTermEnum.next()) {
- assertFalse(testTermEnum.next());
- break;
- }
- assertTrue(testTermEnum.next());
+ BytesRef aprioriText;
+ while((aprioriText = aprioriTermEnum.next()) != null) {
+ assertEquals(aprioriText, testTermEnum.next());
- assertEquals(aprioriTermEnum.term(), testTermEnum.term());
- assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());
+ assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());
- // compare termDocs seeking
+ // compare termDocs seeking
- TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term());
- TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term());
+ DocsEnum aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), null);
+ DocsEnum testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), null);
+
+ while (aprioriTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ assertTrue(testTermDocs.advance(aprioriTermDocs.docID()) != DocsEnum.NO_MORE_DOCS);
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
+ }
+
+ // compare documents per term
+
+ assertEquals(aprioriReader.docFreq(aprioriField, aprioriTermEnum.term()), testReader.docFreq(aprioriField, testTermEnum.term()));
- while (aprioriTermDocsSeeker.next()) {
- assertTrue(testTermDocsSeeker.skipTo(aprioriTermDocsSeeker.doc()));
- assertEquals(aprioriTermDocsSeeker.doc(), testTermDocsSeeker.doc());
- }
+ aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), aprioriTermDocs);
+ testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), testTermDocs);
- aprioriTermDocsSeeker.close();
- testTermDocsSeeker.close();
+ while (true) {
+ if (aprioriTermDocs.nextDoc() == DocsEnum.NO_MORE_DOCS) {
+ assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc());
+ break;
+ }
+ assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
- // compare documents per term
-
- assertEquals(aprioriReader.docFreq(aprioriTermEnum.term()), testReader.docFreq(testTermEnum.term()));
-
- TermDocs aprioriTermDocs = aprioriReader.termDocs(aprioriTermEnum.term());
- TermDocs testTermDocs = testReader.termDocs(testTermEnum.term());
-
- while (true) {
- if (!aprioriTermDocs.next()) {
- assertFalse(testTermDocs.next());
- break;
+ assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
+ assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
}
- assertTrue(testTermDocs.next());
- assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
- assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
- }
+ // compare term positions
- aprioriTermDocs.close();
- testTermDocs.close();
+ DocsAndPositionsEnum aprioriTermPositions = aprioriTermEnum.docsAndPositions(MultiFields.getDeletedDocs(aprioriReader), null);
+ DocsAndPositionsEnum testTermPositions = testTermEnum.docsAndPositions(MultiFields.getDeletedDocs(testReader), null);
- // compare term positions
+ if (aprioriTermPositions != null) {
- TermPositions testTermPositions = testReader.termPositions(testTermEnum.term());
- TermPositions aprioriTermPositions = aprioriReader.termPositions(aprioriTermEnum.term());
+ for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) {
+ boolean hasNext = aprioriTermPositions.nextDoc() != DocsEnum.NO_MORE_DOCS;
+ if (hasNext) {
+ assertTrue(testTermPositions.nextDoc() != DocsEnum.NO_MORE_DOCS);
+
+ assertEquals(aprioriTermPositions.freq(), testTermPositions.freq());
- if (aprioriTermPositions != null) {
+ for (int termPositionIndex = 0; termPositionIndex < aprioriTermPositions.freq(); termPositionIndex++) {
+ int aprioriPos = aprioriTermPositions.nextPosition();
+ int testPos = testTermPositions.nextPosition();
- for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) {
- boolean hasNext = aprioriTermPositions.next();
- if (hasNext) {
- assertTrue(testTermPositions.next());
-
- assertEquals(aprioriTermPositions.freq(), testTermPositions.freq());
-
-
- for (int termPositionIndex = 0; termPositionIndex < aprioriTermPositions.freq(); termPositionIndex++) {
- int aprioriPos = aprioriTermPositions.nextPosition();
- int testPos = testTermPositions.nextPosition();
-
- if (aprioriPos != testPos) {
assertEquals(aprioriPos, testPos);
- }
-
- assertEquals(aprioriTermPositions.isPayloadAvailable(), testTermPositions.isPayloadAvailable());
- if (aprioriTermPositions.isPayloadAvailable()) {
- assertEquals(aprioriTermPositions.getPayloadLength(), testTermPositions.getPayloadLength());
- byte[] aprioriPayloads = aprioriTermPositions.getPayload(new byte[aprioriTermPositions.getPayloadLength()], 0);
- byte[] testPayloads = testTermPositions.getPayload(new byte[testTermPositions.getPayloadLength()], 0);
- for (int i = 0; i < aprioriPayloads.length; i++) {
- assertEquals(aprioriPayloads[i], testPayloads[i]);
+ assertEquals(aprioriTermPositions.hasPayload(), testTermPositions.hasPayload());
+ if (aprioriTermPositions.hasPayload()) {
+ BytesRef apPayload = aprioriTermPositions.getPayload();
+ BytesRef testPayload = testTermPositions.getPayload();
+ assertEquals(apPayload, testPayload);
}
}
-
}
}
}
-
- aprioriTermPositions.close();
- testTermPositions.close();
-
}
+ assertNull(testTermEnum.next());
}
+ assertNull(testFieldsEnum.next());
// compare term vectors and position vectors
@@ -589,12 +517,8 @@
}
}
-
}
- aprioriTermEnum.close();
- testTermEnum.close();
-
aprioriReader.close();
testReader.close();
}
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
===================================================================
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (revision 954967)
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (working copy)
@@ -371,49 +371,11 @@
}
@Override
- public TermEnum terms() throws IOException {
- return new InstantiatedTermEnum(this);
- }
-
- @Override
- public TermEnum terms(Term t) throws IOException {
- InstantiatedTerm it = getIndex().findTerm(t);
- if (it != null) {
- return new InstantiatedTermEnum(this, it.getTermIndex());
- } else {
- int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
- if (startPos < 0) {
- startPos = -1 - startPos;
- }
- return new InstantiatedTermEnum(this, startPos);
+ public Fields fields() {
+ if (getIndex().getOrderedTerms().length == 0) {
+ return null;
}
- }
- @Override
- public TermDocs termDocs() throws IOException {
- return new InstantiatedTermDocs(this);
- }
-
-
- @Override
- public TermDocs termDocs(Term term) throws IOException {
- if (term == null) {
- return new InstantiatedAllTermDocs(this);
- } else {
- InstantiatedTermDocs termDocs = new InstantiatedTermDocs(this);
- termDocs.seek(term);
- return termDocs;
- }
- }
-
- @Override
- public TermPositions termPositions() throws IOException {
- return new InstantiatedTermPositions(this);
- }
-
- @Override
- public Fields fields() {
-
return new Fields() {
@Override
public FieldsEnum iterator() {
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java
===================================================================
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java (revision 954967)
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java (working copy)
@@ -1,83 +0,0 @@
-package org.apache.lucene.store.instantiated;
-
-/**
- * Copyright 2006 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
-
-/**
- * A {@link org.apache.lucene.index.TermEnum} navigating an {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader}.
- */
-public class InstantiatedTermEnum extends TermEnum {
-
- private final InstantiatedIndexReader reader;
-
- public InstantiatedTermEnum(InstantiatedIndexReader reader) {
- this.nextTermIndex = 0;
- this.reader = reader;
- }
-
- public InstantiatedTermEnum(InstantiatedIndexReader reader, int startPosition) {
- this.reader = reader;
- this.nextTermIndex = startPosition;
- next();
- }
-
- private int nextTermIndex;
- private InstantiatedTerm term;
-
- /**
- * Increments the enumeration to the next element. True if one exists.
- */
- @Override
- public boolean next() {
- if (reader.getIndex().getOrderedTerms().length <= nextTermIndex) {
- return false;
- } else {
- term = reader.getIndex().getOrderedTerms()[nextTermIndex];
- nextTermIndex++;
- return true;
- }
- }
-
- /**
- * Returns the current Term in the enumeration.
- */
- @Override
- public Term term() {
- return term == null ? null : term.getTerm();
- }
-
- /**
- * Returns the docFreq of the current Term in the enumeration.
- */
- @Override
- public int docFreq() {
- return term.getAssociatedDocuments().length;
- }
-
- /**
- * Closes the enumeration to further activity, freeing resources.
- */
- @Override
- public void close() {
- }
-
-}
-
-
-
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
===================================================================
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (revision 954967)
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (working copy)
@@ -31,10 +31,15 @@
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.TermPositionVector;
-import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.util.BitVector;
+import org.apache.lucene.util.BytesRef;
/**
* Represented as a coupled graph of class instances, this
@@ -220,34 +225,46 @@
}
}
List terms = new ArrayList(5000 * getTermsByFieldAndText().size());
- TermEnum termEnum = sourceIndexReader.terms();
- while (termEnum.next()) {
- if (fields == null || fields.contains(termEnum.term().field())) { // todo skipto if not using field
- InstantiatedTerm instantiatedTerm = new InstantiatedTerm(termEnum.term().field(), termEnum.term().text());
- getTermsByFieldAndText().get(termEnum.term().field()).put(termEnum.term().text(), instantiatedTerm);
- instantiatedTerm.setTermIndex(terms.size());
- terms.add(instantiatedTerm);
- instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termEnum.docFreq()]);
+ Fields fieldsC = MultiFields.getFields(sourceIndexReader);
+ if (fieldsC != null) {
+ FieldsEnum fieldsEnum = fieldsC.iterator();
+ String field;
+ while((field = fieldsEnum.next()) != null) {
+ if (fields == null || fields.contains(field)) {
+ TermsEnum termsEnum = fieldsEnum.terms();
+ BytesRef text;
+ while((text = termsEnum.next()) != null) {
+ String termText = text.utf8ToString();
+ InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
+ getTermsByFieldAndText().get(field).put(termText, instantiatedTerm);
+ instantiatedTerm.setTermIndex(terms.size());
+ terms.add(instantiatedTerm);
+ instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termsEnum.docFreq()]);
+ }
+ }
}
}
- termEnum.close();
orderedTerms = terms.toArray(new InstantiatedTerm[terms.size()]);
// create term-document informations
for (InstantiatedTerm term : orderedTerms) {
- TermPositions termPositions = sourceIndexReader.termPositions(term.getTerm());
+ DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(sourceIndexReader,
+ MultiFields.getDeletedDocs(sourceIndexReader),
+ term.getTerm().field(),
+ new BytesRef(term.getTerm().text()));
int position = 0;
- while (termPositions.next()) {
- InstantiatedDocument document = documentsByNumber[termPositions.doc()];
+ while (termPositions.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ InstantiatedDocument document = documentsByNumber[termPositions.docID()];
byte[][] payloads = new byte[termPositions.freq()][];
int[] positions = new int[termPositions.freq()];
for (int i = 0; i < termPositions.freq(); i++) {
positions[i] = termPositions.nextPosition();
- if (termPositions.isPayloadAvailable()) {
- payloads[i] = new byte[termPositions.getPayloadLength()];
- termPositions.getPayload(payloads[i], 0);
+ if (termPositions.hasPayload()) {
+ BytesRef br = termPositions.getPayload();
+ payloads[i] = new byte[br.length];
+ System.arraycopy(br.bytes, br.offset, payloads[i], 0, br.length);
}
}
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java
===================================================================
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java (revision 954967)
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java (working copy)
@@ -1,35 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.store.instantiated;
-
-import org.apache.lucene.index.AbstractAllTermDocs;
-
-class InstantiatedAllTermDocs extends AbstractAllTermDocs {
-
- private InstantiatedIndexReader reader;
-
- InstantiatedAllTermDocs(InstantiatedIndexReader reader) {
- super(reader.maxDoc());
- this.reader = reader;
- }
-
- @Override
- public boolean isDeleted(int doc) {
- return reader.isDeleted(doc);
- }
-}
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsEnum.java
===================================================================
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsEnum.java (revision 954967)
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsEnum.java (working copy)
@@ -54,7 +54,7 @@
@Override
public int advance(int target) {
- if (currentDoc.getDocument().getDocumentNumber() >= target) {
+ if (currentDoc != null && currentDoc.getDocument().getDocumentNumber() >= target) {
return nextDoc();
}
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermPositions.java
===================================================================
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermPositions.java (revision 954967)
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermPositions.java (working copy)
@@ -1,100 +0,0 @@
-package org.apache.lucene.store.instantiated;
-
-/**
- * Copyright 2006 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.TermPositions;
-
-import java.io.IOException;
-
-/**
- * A {@link org.apache.lucene.index.TermPositions} navigating an {@link InstantiatedIndexReader}.
- */
-public class InstantiatedTermPositions
- extends InstantiatedTermDocs
- implements TermPositions {
-
- public int getPayloadLength() {
- return currentDocumentInformation.getPayloads()[currentTermPositionIndex].length;
- }
-
- public byte[] getPayload(byte[] data, int offset) throws IOException {
- byte[] payloads = currentDocumentInformation.getPayloads()[currentTermPositionIndex];
-
- // read payloads lazily
- if (data == null || data.length - offset < getPayloadLength()) {
- // the array is too small to store the payload data,
- return payloads;
- } else {
- System.arraycopy(payloads, 0, data, offset, payloads.length);
- return data;
- }
- }
-
- public boolean isPayloadAvailable() {
- return currentDocumentInformation.getPayloads()[currentTermPositionIndex] != null;
- }
-
- public InstantiatedTermPositions(InstantiatedIndexReader reader) {
- super(reader);
- }
-
- /**
- * Returns next position in the current document. It is an error to call
- * this more than {@link #freq()} times
- * without calling {@link #next()} This is
- * invalid until {@link #next()} is called for
- * the first time.
- */
- public int nextPosition() {
- currentTermPositionIndex++;
- // if you get an array out of index exception here,
- // it might be due to currentDocumentInformation.getIndexFromTerm not being set!!
- return currentDocumentInformation.getTermPositions()[currentTermPositionIndex];
- }
-
- private int currentTermPositionIndex;
-
- /**
- * Moves to the next pair in the enumeration.
- *
Returns true if there is such a next pair in the enumeration.
- */
- @Override
- public boolean next() {
- currentTermPositionIndex = -1;
- return super.next();
- }
-
- /**
- * Skips entries to the first beyond the current whose document number is
- * greater than or equal to target.
Returns true iff there is such
- * an entry.
Behaves as if written:
- * boolean skipTo(int target) {
- * do {
- * if (!next())
- * return false;
- * } while (target > doc());
- * return true;
- * }
- *
- * Some implementations are considerably more efficient than that.
- */
- @Override
- public boolean skipTo(int target) {
- currentTermPositionIndex = -1;
- return super.skipTo(target);
- }
-}
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java
===================================================================
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java (revision 954967)
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java (working copy)
@@ -1,136 +0,0 @@
-package org.apache.lucene.store.instantiated;
-
-/**
- * Copyright 2006 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-
-/**
- * A {@link org.apache.lucene.index.TermDocs} navigating an {@link InstantiatedIndexReader}.
- */
-public class InstantiatedTermDocs
- implements TermDocs {
-
- private final InstantiatedIndexReader reader;
-
- public InstantiatedTermDocs(InstantiatedIndexReader reader) {
- this.reader = reader;
- }
-
- private int currentDocumentIndex;
- protected InstantiatedTermDocumentInformation currentDocumentInformation;
- protected InstantiatedTerm currentTerm;
-
-
- public void seek(Term term) {
- currentTerm = reader.getIndex().findTerm(term);
- currentDocumentIndex = -1;
- }
-
- public void seek(org.apache.lucene.index.TermEnum termEnum) {
- seek(termEnum.term());
- }
-
-
- public int doc() {
- return currentDocumentInformation.getDocument().getDocumentNumber();
- }
-
- public int freq() {
- return currentDocumentInformation.getTermPositions().length;
- }
-
-
- public boolean next() {
- if (currentTerm != null) {
- currentDocumentIndex++;
- if (currentDocumentIndex < currentTerm.getAssociatedDocuments().length) {
- currentDocumentInformation = currentTerm.getAssociatedDocuments()[currentDocumentIndex];
- if (reader.isDeleted(currentDocumentInformation.getDocument().getDocumentNumber())) {
- return next();
- } else {
- return true;
- }
- } else {
- // mimic SegmentTermDocs
- currentDocumentIndex = currentTerm.getAssociatedDocuments().length -1;
- }
- }
- return false;
- }
-
-
- public int read(int[] docs, int[] freqs) {
- int i;
- for (i = 0; i < docs.length; i++) {
- if (!next()) {
- break;
- }
- docs[i] = doc();
- freqs[i] = freq();
- }
- return i;
- }
-
- /**
- * Skips entries to the first beyond the current whose document number is
- * greater than or equal to target. Returns true if there is such
- * an entry.
Behaves as if written:
- * boolean skipTo(int target) {
- * do {
- * if (!next())
- * return false;
- * } while (target > doc());
- * return true;
- * }
- *
- * This implementation is considerably more efficient than that.
- *
- */
- public boolean skipTo(int target) {
- if (currentTerm == null) {
- return false;
- }
-
- if (currentDocumentIndex >= target) {
- return next();
- }
-
- int startOffset = currentDocumentIndex >= 0 ? currentDocumentIndex : 0;
- int pos = currentTerm.seekCeilingDocumentInformationIndex(target, startOffset);
-
- if (pos == -1) {
- // mimic SegmentTermDocs that positions at the last index
- currentDocumentIndex = currentTerm.getAssociatedDocuments().length -1;
- return false;
- }
-
- currentDocumentInformation = currentTerm.getAssociatedDocuments()[pos];
- currentDocumentIndex = pos;
- if (reader.hasDeletions() && reader.isDeleted(currentDocumentInformation.getDocument().getDocumentNumber())) {
- return next();
- } else {
- return true;
- }
- }
-
- /**
- * Does nothing
- */
- public void close() {
- }
-}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java (revision 954967)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java (working copy)
@@ -20,8 +20,9 @@
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.PriorityQueue;
@@ -88,16 +89,16 @@
IndexReader ir = IndexReader.open(dir, true);
try {
int threshold = ir.maxDoc() / 10; // ignore words too common.
- TermEnum terms = ir.terms(new Term(field,""));
- while (terms.next()) {
- if (!field.equals(terms.term().field())) {
- break;
+ Terms terms = MultiFields.getTerms(ir, field);
+ if (terms != null) {
+ TermsEnum termsEnum = terms.iterator();
+ while (termsEnum.next() != null) {
+ int df = termsEnum.docFreq();
+ if (df processed (delete) " + numDeleted + " docs");
r.decRef();
Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
===================================================================
--- lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java (revision 954967)
+++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java (working copy)
@@ -22,6 +22,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
public class TestMultiPassIndexSplitter extends LuceneTestCase {
IndexReader input;
@@ -62,30 +63,30 @@
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); // rounding error
Document doc = ir.document(0);
assertEquals("0", doc.get("id"));
- Term t;
- TermEnum te;
- t = new Term("id", "1");
- te = ir.terms(t);
- assertNotSame(t, te.term());
+ TermsEnum te = MultiFields.getTerms(ir, "id").iterator();
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("1")));
+ assertNotSame("1", te.term().utf8ToString());
ir.close();
ir = IndexReader.open(dirs[1], true);
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1);
doc = ir.document(0);
assertEquals("1", doc.get("id"));
- t = new Term("id", "0");
- te = ir.terms(t);
- assertNotSame(t, te.term());
+ te = MultiFields.getTerms(ir, "id").iterator();
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("0")));
+
+ assertNotSame("0", te.term().utf8ToString());
ir.close();
ir = IndexReader.open(dirs[2], true);
assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1);
doc = ir.document(0);
assertEquals("2", doc.get("id"));
- t = new Term("id", "1");
- te = ir.terms(t);
- assertNotSame(t, te.term());
- t = new Term("id", "0");
- te = ir.terms(t);
- assertNotSame(t, te.term());
+
+ te = MultiFields.getTerms(ir, "id").iterator();
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("1")));
+ assertNotSame("1", te.term());
+
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("0")));
+ assertNotSame("0", te.term().utf8ToString());
}
/**
@@ -117,10 +118,9 @@
doc = ir.document(0);
assertEquals(start + "", doc.get("id"));
// make sure the deleted doc is not here
- Term t;
- TermEnum te;
- t = new Term("id", (NUM_DOCS - 1) + "");
- te = ir.terms(t);
- assertNotSame(t, te.term());
+ TermsEnum te = MultiFields.getTerms(ir, "id").iterator();
+ Term t = new Term("id", (NUM_DOCS - 1) + "");
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef(t.text())));
+ assertNotSame(t.text(), te.term().utf8ToString());
}
}
Index: lucene/contrib/misc/src/java/org/apache/lucene/misc/LengthNormModifier.java
===================================================================
--- lucene/contrib/misc/src/java/org/apache/lucene/misc/LengthNormModifier.java (revision 954967)
+++ lucene/contrib/misc/src/java/org/apache/lucene/misc/LengthNormModifier.java (working copy)
@@ -16,14 +16,17 @@
* limitations under the License.
*/
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
import java.io.File;
import java.io.IOException;
@@ -109,37 +112,25 @@
String fieldName = StringHelper.intern(field);
int[] termCounts = new int[0];
- IndexReader reader = null;
- TermEnum termEnum = null;
- TermDocs termDocs = null;
+ IndexReader reader = IndexReader.open(dir, false);
try {
- reader = IndexReader.open(dir, false);
+
termCounts = new int[reader.maxDoc()];
- try {
- termEnum = reader.terms(new Term(field));
- try {
- termDocs = reader.termDocs();
- do {
- Term term = termEnum.term();
- if (term != null && term.field().equals(fieldName)) {
- termDocs.seek(termEnum.term());
- while (termDocs.next()) {
- termCounts[termDocs.doc()] += termDocs.freq();
- }
- }
- } while (termEnum.next());
- } finally {
- if (null != termDocs) termDocs.close();
+ Bits delDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docs = null;
+
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ TermsEnum termsEnum = terms.iterator();
+ while(termsEnum.next() != null) {
+ docs = termsEnum.docs(delDocs, docs);
+ int doc;
+ while ((doc = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
+ termCounts[doc] += docs.freq();
+ }
}
- } finally {
- if (null != termEnum) termEnum.close();
}
- } finally {
- if (null != reader) reader.close();
- }
-
- try {
- reader = IndexReader.open(dir, false);
+
for (int d = 0; d < termCounts.length; d++) {
if (! reader.isDeleted(d)) {
byte norm = Similarity.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]));
@@ -147,7 +138,7 @@
}
}
} finally {
- if (null != reader) reader.close();
+ reader.close();
}
}
Index: lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
===================================================================
--- lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 954967)
+++ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy)
@@ -44,11 +44,8 @@
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
-import org.apache.lucene.index.TermPositions;
import org.apache.lucene.index.TermVectorMapper;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.Collector;
@@ -732,7 +729,6 @@
///////////////////////////////////////////////////////////////////////////////
// Nested classes:
///////////////////////////////////////////////////////////////////////////////
- private static final Term MATCH_ALL_TERM = new Term("");
/**
* Search support for Lucene framework integration; implements all methods
@@ -769,12 +765,6 @@
}
@Override
- public TermEnum terms() {
- if (DEBUG) System.err.println("MemoryIndexReader.terms()");
- return terms(MATCH_ALL_TERM);
- }
-
- @Override
public Fields fields() {
sortFields();
@@ -1011,202 +1001,6 @@
}
@Override
- public TermEnum terms(Term term) {
- if (DEBUG) System.err.println("MemoryIndexReader.terms: " + term);
-
- int i; // index into info.sortedTerms
- int j; // index into sortedFields
-
- sortFields();
- if (sortedFields.length == 1 && sortedFields[0].getKey() == term.field()) {
- j = 0; // fast path
- } else {
- j = Arrays.binarySearch(sortedFields, term.field(), termComparator);
- }
-
- if (j < 0) { // not found; choose successor
- j = -j -1;
- i = 0;
- if (j < sortedFields.length) getInfo(j).sortTerms();
- } else { // found
- Info info = getInfo(j);
- info.sortTerms();
- i = Arrays.binarySearch(info.sortedTerms, term.text(), termComparator);
- if (i < 0) { // not found; choose successor
- i = -i -1;
- if (i >= info.sortedTerms.length) { // move to next successor
- j++;
- i = 0;
- if (j < sortedFields.length) getInfo(j).sortTerms();
- }
- }
- }
- final int ix = i;
- final int jx = j;
-
- return new TermEnum() {
-
- private int srtTermsIdx = ix; // index into info.sortedTerms
- private int srtFldsIdx = jx; // index into sortedFields
-
- @Override
- public boolean next() {
- if (DEBUG) System.err.println("TermEnum.next");
- if (srtFldsIdx >= sortedFields.length) return false;
- Info info = getInfo(srtFldsIdx);
- if (++srtTermsIdx < info.sortedTerms.length) return true;
-
- // move to successor
- srtFldsIdx++;
- srtTermsIdx = 0;
- if (srtFldsIdx >= sortedFields.length) return false;
- getInfo(srtFldsIdx).sortTerms();
- return true;
- }
-
- @Override
- public Term term() {
- if (DEBUG) System.err.println("TermEnum.term: " + srtTermsIdx);
- if (srtFldsIdx >= sortedFields.length) return null;
- Info info = getInfo(srtFldsIdx);
- if (srtTermsIdx >= info.sortedTerms.length) return null;
-// if (DEBUG) System.err.println("TermEnum.term: " + i + ", " + info.sortedTerms[i].getKey());
- return createTerm(info, srtFldsIdx, info.sortedTerms[srtTermsIdx].getKey());
- }
-
- @Override
- public int docFreq() {
- if (DEBUG) System.err.println("TermEnum.docFreq");
- if (srtFldsIdx >= sortedFields.length) return 0;
- Info info = getInfo(srtFldsIdx);
- if (srtTermsIdx >= info.sortedTerms.length) return 0;
- return numPositions(info.getPositions(srtTermsIdx));
- }
-
- @Override
- public void close() {
- if (DEBUG) System.err.println("TermEnum.close");
- }
-
- /** Returns a new Term object, minimizing String.intern() overheads. */
- private Term createTerm(Info info, int pos, String text) {
- // Assertion: sortFields has already been called before
- Term template = info.template;
- if (template == null) { // not yet cached?
- String fieldName = sortedFields[pos].getKey();
- template = new Term(fieldName);
- info.template = template;
- }
-
- return template.createTerm(text);
- }
-
- };
- }
-
- @Override
- public TermPositions termPositions() {
- if (DEBUG) System.err.println("MemoryIndexReader.termPositions");
-
- return new TermPositions() {
-
- private boolean hasNext;
- private int cursor = 0;
- private ArrayIntList current;
- private Term term;
-
- public void seek(Term term) {
- this.term = term;
- if (DEBUG) System.err.println(".seek: " + term);
- if (term == null) {
- hasNext = true; // term==null means match all docs
- } else {
- Info info = getInfo(term.field());
- current = info == null ? null : info.getPositions(term.text());
- hasNext = (current != null);
- cursor = 0;
- }
- }
-
- public void seek(TermEnum termEnum) {
- if (DEBUG) System.err.println(".seekEnum");
- seek(termEnum.term());
- }
-
- public int doc() {
- if (DEBUG) System.err.println(".doc");
- return 0;
- }
-
- public int freq() {
- int freq = current != null ? numPositions(current) : (term == null ? 1 : 0);
- if (DEBUG) System.err.println(".freq: " + freq);
- return freq;
- }
-
- public boolean next() {
- if (DEBUG) System.err.println(".next: " + current + ", oldHasNext=" + hasNext);
- boolean next = hasNext;
- hasNext = false;
- return next;
- }
-
- public int read(int[] docs, int[] freqs) {
- if (DEBUG) System.err.println(".read: " + docs.length);
- if (!hasNext) return 0;
- hasNext = false;
- docs[0] = 0;
- freqs[0] = freq();
- return 1;
- }
-
- public boolean skipTo(int target) {
- if (DEBUG) System.err.println(".skipTo: " + target);
- return next();
- }
-
- public void close() {
- if (DEBUG) System.err.println(".close");
- }
-
- public int nextPosition() { // implements TermPositions
- int pos = current.get(cursor);
- cursor += stride;
- if (DEBUG) System.err.println(".nextPosition: " + pos);
- return pos;
- }
-
- /**
- * Not implemented.
- * @throws UnsupportedOperationException
- */
- public int getPayloadLength() {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Not implemented.
- * @throws UnsupportedOperationException
- */
- public byte[] getPayload(byte[] data, int offset) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- public boolean isPayloadAvailable() {
- // unsuported
- return false;
- }
-
- };
- }
-
- @Override
- public TermDocs termDocs() {
- if (DEBUG) System.err.println("MemoryIndexReader.termDocs");
- return termPositions();
- }
-
- @Override
public TermFreqVector[] getTermFreqVectors(int docNumber) {
if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors");
TermFreqVector[] vectors = new TermFreqVector[fields.size()];
Index: lucene/contrib/lucli/src/java/lucli/LuceneMethods.java
===================================================================
--- lucene/contrib/lucli/src/java/lucli/LuceneMethods.java (revision 954967)
+++ lucene/contrib/lucli/src/java/lucli/LuceneMethods.java (working copy)
@@ -43,8 +43,10 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
@@ -58,6 +60,7 @@
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
/**
* Various methods that interact with Lucene and provide info about the
@@ -342,13 +345,21 @@
public void terms(String field) throws IOException {
TreeMap termMap = new TreeMap();
IndexReader indexReader = IndexReader.open(indexName, true);
- TermEnum terms = indexReader.terms();
- while (terms.next()) {
- Term term = terms.term();
- //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
- //if we're either not looking by field or we're matching the specific field
- if ((field == null) || field.equals(term.field()))
- termMap.put(term.field() + ":" + term.text(), Integer.valueOf((terms.docFreq())));
+ Fields fields = MultiFields.getFields(indexReader);
+ if (fields != null) {
+ FieldsEnum fieldsEnum = fields.iterator();
+ String curField;
+ while((curField = fieldsEnum.next()) != null) {
+ TermsEnum terms = fieldsEnum.terms();
+ BytesRef text;
+ while ((text = terms.next()) != null) {
+ //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
+ //if we're either not looking by field or we're matching the specific field
+ if ((field == null) || field.equals(curField)) {
+ termMap.put(curField + ":" + text.utf8ToString(), Integer.valueOf((terms.docFreq())));
+ }
+ }
+ }
}
Iterator termIterator = termMap.keySet().iterator();
Index: lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java
===================================================================
--- lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java (revision 954967)
+++ lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java (working copy)
@@ -27,9 +27,11 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
public class DuplicateFilterTest extends LuceneTestCase {
private static final String KEY_FIELD = "url";
@@ -134,11 +136,14 @@
{
Document d=searcher.doc(hits[i].doc);
String url=d.get(KEY_FIELD);
- TermDocs td = reader.termDocs(new Term(KEY_FIELD,url));
+ DocsEnum td = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ KEY_FIELD,
+ new BytesRef(url));
int lastDoc=0;
- while(td.next())
+ while(td.nextDoc() != DocsEnum.NO_MORE_DOCS)
{
- lastDoc=td.doc();
+ lastDoc=td.docID();
}
assertEquals("Duplicate urls should return last doc",lastDoc, hits[i].doc);
}
@@ -155,10 +160,13 @@
{
Document d=searcher.doc(hits[i].doc);
String url=d.get(KEY_FIELD);
- TermDocs td = reader.termDocs(new Term(KEY_FIELD,url));
+ DocsEnum td = MultiFields.getTermDocsEnum(reader,
+ MultiFields.getDeletedDocs(reader),
+ KEY_FIELD,
+ new BytesRef(url));
int lastDoc=0;
- td.next();
- lastDoc=td.doc();
+ td.nextDoc();
+ lastDoc=td.docID();
assertEquals("Duplicate urls should return first doc",lastDoc, hits[i].doc);
}
}
Index: lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java
===================================================================
--- lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java (revision 954967)
+++ lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java (working copy)
@@ -25,7 +25,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
@@ -80,10 +80,9 @@
}
public void testMatchAll() throws Exception {
- TermEnum terms = new RegexQuery(new Term(FN, "jum.")).getEnum(searcher.getIndexReader());
+ TermsEnum terms = new RegexQuery(new Term(FN, "jum.")).getTermsEnum(searcher.getIndexReader());
// no term should match
- assertNull(terms.term());
- assertFalse(terms.next());
+ assertNull(terms.next());
}
public void testRegex1() throws Exception {
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java (revision 954967)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java (working copy)
@@ -24,8 +24,14 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Bits;
/**
* Constructs a filter for docs matching any of the terms added to this class.
@@ -52,28 +58,37 @@
* @see org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.IndexReader)
*/
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException
- {
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
OpenBitSet result=new OpenBitSet(reader.maxDoc());
- TermDocs td = reader.termDocs();
- try
- {
- for (Iterator iter = terms.iterator(); iter.hasNext();)
- {
- Term term = iter.next();
- td.seek(term);
- while (td.next())
- {
- result.set(td.doc());
- }
+ Fields fields = MultiFields.getFields(reader);
+ BytesRef br = new BytesRef();
+ Bits delDocs = MultiFields.getDeletedDocs(reader);
+ if (fields != null) {
+ String lastField = null;
+ Terms termsC = null;
+ TermsEnum termsEnum = null;
+ DocsEnum docs = null;
+ for (Iterator iter = terms.iterator(); iter.hasNext();) {
+ Term term = iter.next();
+ if (term.field() != lastField) {
+ termsC = fields.terms(term.field());
+ termsEnum = termsC.iterator();
+ lastField = term.field();
+ }
+
+ if (terms != null) {
+ br.copy(term.text());
+ if (termsEnum.seek(br) == TermsEnum.SeekStatus.FOUND) {
+ docs = termsEnum.docs(delDocs, docs);
+ while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ result.set(docs.docID());
}
+ }
}
- finally
- {
- td.close();
- }
- return result;
- }
+ }
+ }
+ return result;
+ }
@Override
public boolean equals(Object obj)
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java (revision 954967)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java (working copy)
@@ -18,7 +18,7 @@
*/
import org.apache.lucene.search.MultiTermQuery;
-import org.apache.lucene.search.FilteredTermEnum;
+import org.apache.lucene.search.FilteredTermsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.ToStringUtils;
@@ -60,8 +60,8 @@
}
@Override
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
- return new RegexTermEnum(reader, term, regexImpl);
+ protected FilteredTermsEnum getTermsEnum(IndexReader reader) throws IOException {
+ return new RegexTermsEnum(reader, term, regexImpl);
}
@Override
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermEnum.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermEnum.java (revision 954967)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermEnum.java (working copy)
@@ -1,83 +0,0 @@
-package org.apache.lucene.search.regex;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.search.FilteredTermEnum;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-
-import java.io.IOException;
-
-/**
- * Subclass of FilteredTermEnum for enumerating all terms that match the
- * specified regular expression term using the specified regular expression
- * implementation.
- *
- * Term enumerations are always ordered by Term.compareTo(). Each term in
- * the enumeration is greater than all that precede it.
- */
-
-public class RegexTermEnum extends FilteredTermEnum {
- private String field = "";
- private String pre = "";
- private boolean endEnum = false;
- private RegexCapabilities regexImpl;
-
- public RegexTermEnum(IndexReader reader, Term term, RegexCapabilities regexImpl) throws IOException {
- super();
- field = term.field();
- String text = term.text();
- this.regexImpl = regexImpl;
-
- regexImpl.compile(text);
-
- pre = regexImpl.prefix();
- if (pre == null) pre = "";
-
- setEnum(reader.terms(new Term(term.field(), pre)));
- }
-
- @Override
- protected final boolean termCompare(Term term) {
- if (field == term.field()) {
- String searchText = term.text();
- if (searchText.startsWith(pre)) {
- return regexImpl.match(searchText);
- }
- }
- endEnum = true;
- return false;
- }
-
- @Override
- public final float difference() {
-// TODO: adjust difference based on distance of searchTerm.text() and term().text()
- return 1.0f;
- }
-
- @Override
- public final boolean endEnum() {
- return endEnum;
- }
-
- @Override
- public void close() throws IOException {
- super.close();
- field = null;
- }
-}
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (revision 0)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (revision 0)
@@ -0,0 +1,67 @@
+package org.apache.lucene.search.regex;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.FilteredTermsEnum;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+/**
+ * Subclass of FilteredTermEnum for enumerating all terms that match the
+ * specified regular expression term using the specified regular expression
+ * implementation.
+ *
+ * Term enumerations are always ordered by Term.compareTo(). Each term in
+ * the enumeration is greater than all that precede it.
+ */
+
+public class RegexTermsEnum extends FilteredTermsEnum {
+ private String field = "";
+ private String pre = "";
+ private boolean endEnum = false;
+ private RegexCapabilities regexImpl;
+ private final BytesRef prefixRef;
+
+ public RegexTermsEnum(IndexReader reader, Term term, RegexCapabilities regexImpl) throws IOException {
+ super(reader, term.field());
+ String text = term.text();
+ this.regexImpl = regexImpl;
+
+ regexImpl.compile(text);
+
+ pre = regexImpl.prefix();
+ if (pre == null) pre = "";
+
+ setInitialSeekTerm(prefixRef = new BytesRef(pre));
+ }
+
+ @Override
+ protected AcceptStatus accept(BytesRef term) {
+ if (term.startsWith(prefixRef)) {
+ // TODO: set BoostAttr based on distance of
+ // searchTerm.text() and term().text()
+ String text = term.utf8ToString();
+ return regexImpl.match(text) ? AcceptStatus.YES : AcceptStatus.NO;
+ } else {
+ return AcceptStatus.NO;
+ }
+ }
+}
Property changes on: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/MIGRATE.txt
===================================================================
--- lucene/MIGRATE.txt (revision 954967)
+++ lucene/MIGRATE.txt (working copy)
@@ -1,5 +1,5 @@
-LUCENE-2380
+LUCENE-2380: FieldCache.getStrings/Index --> FieldCache.getDocTerms/Index
* The field values returned when sorting by SortField.STRING are now
BytesRef. You can call value.utf8ToString() to convert back to
@@ -43,3 +43,171 @@
String. You can call the .utf8ToString() method on the BytesRef
instances, if necessary.
+
+
+LUCENE-1458, LUCENE-2111: Flexible Indexing
+
+ Flexible indexing changed the low level fields/terms/docs/positions
+ enumeration APIs. Here are the major changes:
+
+ * Terms are now binary in nature (arbitrary byte[]), represented
+ by the BytesRef class (which provides an offset + length "slice"
+ into an existing byte[]).
+
+ * Fields are separately enumerated (FieldsEnum) from the terms
+ within each field (TermEnum). So instead of this:
+
+ TermEnum termsEnum = ...;
+ while(termsEnum.next()) {
+ Term t = termsEnum.term();
+ System.out.println("field=" + t.field() + "; text=" + t.text());
+ }
+
+ Do this:
+
+ FieldsEnum fieldsEnum = ...;
+ String field;
+ while((field = fieldsEnum.next()) != null) {
+ TermsEnum termsEnum = fieldsEnum.terms();
+ BytesRef text;
+ while((text = termsEnum.next()) != null) {
+ System.out.println("field=" + field + "; text=" + text.utf8ToString());
+ }
+
+ * TermDocs is renamed to DocsEnum. Instead of this:
+
+ while(td.next()) {
+ int doc = td.doc();
+ ...
+ }
+
+ do this:
+
+ int doc;
+ while((doc = td.next()) != DocsEnum.NO_MORE_DOCS) {
+ ...
+ }
+
+ Instead of this:
+
+ if (td.skipTo(target)) {
+ int doc = td.doc();
+ ...
+ }
+
+ do this:
+
+ if ((doc=td.skipTo(target)) != DocsEnum.NO_MORE_DOCS) {
+ ...
+ }
+
+ * TermPositions is renamed to DocsAndPositionsEnum, and no longer
+ extends the docs only enumerator (DocsEnum).
+
+ * Deleted docs are no longer implicitly filtered from
+ docs/positions enums. Instead, you pass a Bits
+ skipDocs (set bits are skipped) when obtaining the enums. Also,
+ you can now ask a reader for its deleted docs.
+
+ * The docs/positions enums cannot seek to a term. Instead,
+ TermsEnum is able to seek, and then you request the
+ docs/positions enum from that TermsEnum.
+
+ * TermsEnum's seek method returns more information. So instead of
+ this:
+
+ Term t;
+ TermEnum termEnum = reader.terms(t);
+ if (t.equals(termEnum.term())) {
+ ...
+ }
+
+ do this:
+
+ TermsEnum termsEnum = ...;
+ BytesRef text;
+ if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) {
+ ...
+ }
+
+ SeekStatus also contains END (enumerator is done) and NOT_FOUND
+ (term was not found but enumerator is now positioned to the next
+ term).
+
+ * TermsEnum has an ord() method, returning the long numeric
+ ordinal (ie, first term is 0, next is 1, and so on) for the term
+ it's not positioned to. There is also a corresponding seek(long
+ ord) method. Note that these methods are optional; in
+ particular the MultiFields TermsEnum does not implement them.
+
+
+ How you obtain the enums has changed. The primary entry point is
+ the Fields class. If you know your reader is a single segment
+ reader, do this:
+
+ Fields fields = reader.Fields();
+ if (fields != null) {
+ ...
+ }
+
+ If the reader might be multi-segment, you must do this:
+
+ Fields fields = MultiFields.getFields(reader);
+ if (fields != null) {
+ ...
+ }
+
+ The fields may be null (eg if the reader has no fields).
+
+ Note that the MultiFields approach entails a performance hit on
+ MultiReaders, as it must merge terms/docs/positions on the fly. It's
+ generally better to instead get the sequential readers (use
+ oal.util.ReaderUtil) and then step through those readers yourself,
+ if you can (this is how Lucene drives searches).
+
+ If you pass a SegmentReader to MultiFields.fiels it will simply
+ return reader.fields(), so there is no performance hit in that
+ case.
+
+ Once you have a non-null Fields you can do this:
+
+ Terms terms = fields.terms("field");
+ if (terms != null) {
+ ...
+ }
+
+ The terms may be null (eg if the field does not exist).
+
+ Once you have a non-null terms you can get an enum like this:
+
+ TermsEnum termsEnum = terms.iterator();
+
+ The returned TermsEnum will not be null.
+
+ You can then .next() through the TermsEnum, or seek. If you want a
+ DocsEnum, do this:
+
+ Bits skipDocs = MultiFields.getDeletedDocs(reader);
+ DocsEnum docsEnum = null;
+
+ docsEnum = termsEnum.docs(skipDocs, docsEnum);
+
+ You can pass in a prior DocsEnum and it will be reused if possible.
+
+ Likewise for DocsAndPositionsEnum.
+
+ IndexReader has several sugar methods (which just go through the
+ above steps, under the hood). Instead of:
+
+ Term t;
+ TermDocs termDocs = reader.termDocs();
+ termDocs.seek(t);
+
+ do this:
+
+ String field;
+ BytesRef text;
+ DocsEnum docsEnum = reader.termDocsEnum(reader.getDeletedDocs(), field, text);
+
+ Likewise for DocsAndPositionsEnum.
+