Index: solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java =================================================================== --- solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java (revision 954967) +++ solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java (working copy) @@ -17,22 +17,13 @@ package org.apache.solr.update; -import java.io.IOException; -import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; -import java.util.List; import java.util.Map; -import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.SegmentReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.MapSolrParams; @@ -40,9 +31,7 @@ import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.SolrIndexReader; -import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.AbstractSolrTestCase; -import org.apache.solr.util.RefCounted; /** * Index: solr/src/test/org/apache/solr/request/TestFaceting.java =================================================================== --- solr/src/test/org/apache/solr/request/TestFaceting.java (revision 954967) +++ solr/src/test/org/apache/solr/request/TestFaceting.java (working copy) @@ -18,6 +18,7 @@ package org.apache.solr.request; import org.apache.lucene.index.Term; +import org.apache.lucene.util.BytesRef; import org.apache.solr.SolrTestCaseJ4; import org.junit.After; import org.junit.BeforeClass; @@ -68,7 +69,7 @@ req = lrf.makeRequest("q","*:*"); TermIndex ti = new TermIndex(proto.field()); - NumberedTermEnum te = ti.getEnumerator(req.getSearcher().getReader()); + NumberedTermsEnum te = ti.getEnumerator(req.getSearcher().getReader()); // iterate through first while(te.term() != null) te.next(); @@ -82,11 +83,11 @@ for (int i=0; i0, te.skipTo(proto.createTerm("000"))); + assertEquals(size>0, te.skipTo(new BytesRef("000")) != null); assertEquals(0, te.getTermNumber()); if (size>0) { - assertEquals(t(0), te.term().text()); + assertEquals(t(0), te.term().utf8ToString()); } else { assertEquals(null, te.term()); } @@ -107,10 +108,10 @@ for (int i=0; i numTimesNext) { - termEnum = reader.terms(protoTerm.createTerm(internalKey)); - t = termEnum.term(); + spare.copy(internalKey); + termsEnum.seek(spare); + t = termsEnum.term(); } else { - hasNext = termEnum.next(); - t = hasNext ? termEnum.term() : null; + t = termsEnum.next(); } - if (t != null && t.field() == idName) { // intern'd comparison - termVal = t.text(); + if (t != null) { + termVal = t.utf8ToString(); } else { termVal = lastVal; } @@ -319,14 +319,14 @@ } if (!sorted) { - termEnum = reader.terms(protoTerm.createTerm(internalKey)); - t = termEnum.term(); - if (t != null && t.field() == idName // intern'd comparison - && internalKey.equals(t.text())) - { - termDocs.seek (termEnum); - while (termDocs.next()) { - vals[termDocs.doc()] = fval; + spare.copy(internalKey); + TermsEnum.SeekStatus result = termsEnum.seek(spare); + t = termsEnum.term(); + if (result == TermsEnum.SeekStatus.FOUND) { + docsEnum = termsEnum.docs(delDocs, docsEnum); + int doc; + while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) { + vals[doc] = fval; } } else { if (notFoundCount<10) { // collect first 10 not found for logging @@ -342,8 +342,6 @@ } finally { // swallow exceptions on close so we don't override any // exceptions that happened in the loop - if (termDocs!=null) try{termDocs.close();}catch(Exception e){} - if (termEnum!=null) try{termEnum.close();}catch(Exception e){} try{r.close();}catch(Exception e){} } Index: solr/src/java/org/apache/solr/search/SolrIndexReader.java =================================================================== --- solr/src/java/org/apache/solr/search/SolrIndexReader.java (revision 954967) +++ solr/src/java/org/apache/solr/search/SolrIndexReader.java (working copy) @@ -485,11 +485,6 @@ } @Override - public TermPositions termPositions(Term term) throws IOException { - return in.termPositions(term); - } - - @Override public void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { in.undeleteAll(); } Index: solr/src/java/org/apache/solr/request/UnInvertedField.java =================================================================== --- solr/src/java/org/apache/solr/request/UnInvertedField.java (revision 954967) +++ solr/src/java/org/apache/solr/request/UnInvertedField.java (working copy) @@ -20,8 +20,11 @@ import org.apache.lucene.search.FieldCache; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.solr.common.params.FacetParams; @@ -36,6 +39,8 @@ import org.apache.solr.handler.component.StatsValues; import org.apache.solr.handler.component.FieldFacetStats; import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Bits; import java.io.IOException; import java.util.ArrayList; @@ -43,6 +48,7 @@ import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; +import java.util.Comparator; import java.util.concurrent.atomic.AtomicLong; @@ -80,12 +86,12 @@ private static int TNUM_OFFSET=2; static class TopTerm { - Term term; + BytesRef term; int termNum; long memSize() { return 8 + // obj header - 8 + 8 +(term.text().length()<<1) + //term + 8 + 8 +term.length + //term 4; // int } } @@ -191,7 +197,7 @@ final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts) maxTermCounts = new int[1024]; - NumberedTermEnum te = ti.getEnumerator(reader); + NumberedTermsEnum te = ti.getEnumerator(reader); // threshold, over which we use set intersections instead of counting // to (1) save memory, and (2) speed up faceting. @@ -199,8 +205,6 @@ // the threshold even when the index is very small. int threshold = maxDoc / 20 + 2; // threshold = 2000000000; //////////////////////////////// USE FOR TESTING - int[] docs = new int[1000]; - int[] freqs = new int[1000]; // we need a minimum of 9 bytes, but round up to 12 since the space would // be wasted with most allocators anyway. @@ -223,7 +227,7 @@ // frequent terms ahead of time. for (;;) { - Term t = te.term(); + BytesRef t = te.term(); if (t==null) break; int termNum = te.getTermNumber(); @@ -239,11 +243,11 @@ int df = te.docFreq(); if (df >= threshold) { TopTerm topTerm = new TopTerm(); - topTerm.term = t; + topTerm.term = new BytesRef(t); topTerm.termNum = termNum; bigTerms.put(topTerm.termNum, topTerm); - DocSet set = searcher.getDocSet(new TermQuery(topTerm.term)); + DocSet set = searcher.getDocSet(new TermQuery(new Term(ti.field, topTerm.term.utf8ToString()))); maxTermCounts[termNum] = set.size(); te.next(); @@ -252,17 +256,19 @@ termsInverted++; - TermDocs td = te.getTermDocs(); - td.seek(te); + DocsEnum td = te.getDocsEnum(); + + DocsEnum.BulkReadResult bulkResult = td.getBulkResult(); + for(;;) { - int n = td.read(docs,freqs); + int n = td.read(); if (n <= 0) break; maxTermCounts[termNum] += n; for (int i=0; i 0) { - te.skipTo(prefix); + te.skipTo(new BytesRef(prefix)); startTerm = te.getTermNumber(); - te.skipTo(prefix + "\uffff\uffff\uffff\uffff"); + te.skipTo(new BytesRef(prefix + "\uffff\uffff\uffff\uffff")); endTerm = te.getTermNumber(); } @@ -497,7 +503,7 @@ for (TopTerm tt : bigTerms.values()) { // TODO: counts could be deferred if sorted==false if (tt.termNum >= startTerm && tt.termNum < endTerm) { - counts[tt.termNum] = searcher.numDocs(new TermQuery(tt.term), docs); + counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(ti.field, tt.term.utf8ToString())), docs); } } @@ -669,7 +675,7 @@ final int[] index = this.index; final int[] counts = new int[numTermsInField];//keep track of the number of times we see each word in the field for all the documents in the docset - NumberedTermEnum te = ti.getEnumerator(searcher.getReader()); + NumberedTermsEnum te = ti.getEnumerator(searcher.getReader()); boolean doNegative = false; @@ -693,12 +699,13 @@ for (TopTerm tt : bigTerms.values()) { // TODO: counts could be deferred if sorted==false if (tt.termNum >= 0 && tt.termNum < numTermsInField) { + final Term t = new Term(ti.field, tt.term.utf8ToString()); if (finfo.length == 0) { - counts[tt.termNum] = searcher.numDocs(new TermQuery(tt.term), docs); + counts[tt.termNum] = searcher.numDocs(new TermQuery(t), docs); } else { //COULD BE VERY SLOW //if we're collecting stats for facet fields, we need to iterate on all matching documents - DocSet bigTermDocSet = searcher.getDocSet(new TermQuery(tt.term)).intersection(docs); + DocSet bigTermDocSet = searcher.getDocSet(new TermQuery(t)).intersection(docs); DocIterator iter = bigTermDocSet.iterator(); while (iter.hasNext()) { int doc = iter.nextDoc(); @@ -795,17 +802,16 @@ - String getTermText(NumberedTermEnum te, int termNum) throws IOException { + String getTermText(NumberedTermsEnum te, int termNum) throws IOException { if (bigTerms.size() > 0) { // see if the term is one of our big terms. TopTerm tt = bigTerms.get(termNum); if (tt != null) { - return tt.term.text(); + return tt.term.utf8ToString(); } } - te.skipTo(termNum); - return te.term().text(); + return te.skipTo(termNum).utf8ToString(); } public String toString() { @@ -860,95 +866,97 @@ ***/ -class NumberedTermEnum extends TermEnum { +class NumberedTermsEnum extends TermsEnum { protected final IndexReader reader; protected final TermIndex tindex; - protected TermEnum tenum; + protected TermsEnum tenum; protected int pos=-1; - protected Term t; - protected TermDocs termDocs; + protected BytesRef termText; + protected DocsEnum docsEnum; - NumberedTermEnum(IndexReader reader, TermIndex tindex) throws IOException { + NumberedTermsEnum(IndexReader reader, TermIndex tindex) throws IOException { this.reader = reader; this.tindex = tindex; } - NumberedTermEnum(IndexReader reader, TermIndex tindex, String termValue, int pos) throws IOException { + NumberedTermsEnum(IndexReader reader, TermIndex tindex, BytesRef termValue, int pos) throws IOException { this.reader = reader; this.tindex = tindex; this.pos = pos; - tenum = reader.terms(tindex.createTerm(termValue)); - setTerm(); + Terms terms = MultiFields.getTerms(reader, tindex.field); + if (terms != null) { + tenum = terms.iterator(); + tenum.seek(termValue); + setTerm(); + } } - public TermDocs getTermDocs() throws IOException { - if (termDocs==null) termDocs = reader.termDocs(t); - else termDocs.seek(t); - return termDocs; + @Override + public Comparator getComparator() throws IOException { + return tenum.getComparator(); } - protected boolean setTerm() { - t = tenum.term(); - if (t==null - || t.field() != tindex.fterm.field() // intern'd compare - || (tindex.prefix != null && !t.text().startsWith(tindex.prefix,0)) ) - { - t = null; - return false; + public DocsEnum getDocsEnum() throws IOException { + docsEnum = tenum.docs(MultiFields.getDeletedDocs(reader), docsEnum); + return docsEnum; + } + + protected BytesRef setTerm() throws IOException { + termText = tenum.term(); + if (tindex.prefix != null && !termText.startsWith(tindex.prefix)) { + termText = null; } - return true; + return termText; } - - public boolean next() throws IOException { + @Override + public BytesRef next() throws IOException { pos++; - boolean b = tenum.next(); - if (!b) { - t = null; - return false; + if (tenum.next() == null) { + termText = null; + return null; } return setTerm(); // this is extra work if we know we are in bounds... } - public Term term() { - return t; + @Override + public BytesRef term() { + return termText; } + @Override public int docFreq() { return tenum.docFreq(); } - public void close() throws IOException { - if (tenum!=null) tenum.close(); - } + public BytesRef skipTo(BytesRef target) throws IOException { - public boolean skipTo(String target) throws IOException { - return skipTo(tindex.fterm.createTerm(target)); - } - - public boolean skipTo(Term target) throws IOException { // already here - if (t != null && t.equals(target)) return true; + if (termText != null && termText.equals(target)) return termText; - int startIdx = Arrays.binarySearch(tindex.index,target.text()); + if (tenum == null) { + return null; + } + int startIdx = Arrays.binarySearch(tindex.index,target); + if (startIdx >= 0) { // we hit the term exactly... lucky us! - if (tenum != null) tenum.close(); - tenum = reader.terms(target); + TermsEnum.SeekStatus seekStatus = tenum.seek(target); + assert seekStatus == TermsEnum.SeekStatus.FOUND; pos = startIdx << tindex.intervalBits; return setTerm(); } // we didn't hit the term exactly startIdx=-startIdx-1; - + if (startIdx == 0) { // our target occurs *before* the first term - if (tenum != null) tenum.close(); - tenum = reader.terms(target); + TermsEnum.SeekStatus seekStatus = tenum.seek(target); + assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND; pos = 0; return setTerm(); } @@ -956,53 +964,81 @@ // back up to the start of the block startIdx--; - if ((pos >> tindex.intervalBits) == startIdx && t != null && t.text().compareTo(target.text())<=0) { + if ((pos >> tindex.intervalBits) == startIdx && termText != null && termText.compareTo(target)<=0) { // we are already in the right block and the current term is before the term we want, // so we don't need to seek. } else { // seek to the right block - if (tenum != null) tenum.close(); - tenum = reader.terms(target.createTerm(tindex.index[startIdx])); + TermsEnum.SeekStatus seekStatus = tenum.seek(tindex.index[startIdx]); + assert seekStatus == TermsEnum.SeekStatus.FOUND; pos = startIdx << tindex.intervalBits; - setTerm(); // should be true since it's in the index + setTerm(); // should be non-null since it's in the index } - - while (t != null && t.text().compareTo(target.text()) < 0) { + while (termText != null && termText.compareTo(target) < 0) { next(); } - return t != null; + return termText; } - - public boolean skipTo(int termNumber) throws IOException { + public BytesRef skipTo(int termNumber) throws IOException { int delta = termNumber - pos; if (delta < 0 || delta > tindex.interval || tenum==null) { int idx = termNumber >>> tindex.intervalBits; - String base = tindex.index[idx]; + BytesRef base = tindex.index[idx]; pos = idx << tindex.intervalBits; delta = termNumber - pos; - if (tenum != null) tenum.close(); - tenum = reader.terms(tindex.createTerm(base)); + TermsEnum.SeekStatus seekStatus = tenum.seek(base); + assert seekStatus == TermsEnum.SeekStatus.FOUND; } while (--delta >= 0) { - boolean b = tenum.next(); - if (b==false) { - t = null; - return false; + BytesRef br = tenum.next(); + if (br == null) { + termText = null; + return null; } ++pos; } return setTerm(); } + protected void close() throws IOException { + // no-op, needed so the anon subclass that does indexing + // can build its index + } + /** The current term number, starting at 0. * Only valid if the previous call to next() or skipTo() returned true. */ public int getTermNumber() { return pos; } + + @Override + public long ord() { + throw new UnsupportedOperationException(); + } + + @Override + public SeekStatus seek(long ord) { + throw new UnsupportedOperationException(); + } + + @Override + public DocsEnum docs(Bits skipDocs, DocsEnum reuse) { + throw new UnsupportedOperationException(); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) { + throw new UnsupportedOperationException(); + } + + @Override + public SeekStatus seek(BytesRef target, boolean useCache) { + throw new UnsupportedOperationException(); + } } @@ -1018,9 +1054,9 @@ final static int intervalMask = 0xffffffff >>> (32-intervalBits); final static int interval = 1 << intervalBits; - final Term fterm; // prototype to be used in term construction w/o String.intern overhead - final String prefix; - String[] index; + final String field; + final BytesRef prefix; + BytesRef[] index; int nTerms; long sizeOfStrings; @@ -1029,16 +1065,12 @@ } TermIndex(String field, String prefix) { - this.fterm = new Term(field, ""); - this.prefix = prefix; + this.field = field; + this.prefix = prefix == null ? null : new BytesRef(prefix); } - Term createTerm(String termVal) { - return fterm.createTerm(termVal); - } - - NumberedTermEnum getEnumerator(IndexReader reader, int termNumber) throws IOException { - NumberedTermEnum te = new NumberedTermEnum(reader, this); + NumberedTermsEnum getEnumerator(IndexReader reader, int termNumber) throws IOException { + NumberedTermsEnum te = new NumberedTermsEnum(reader, this); te.skipTo(termNumber); return te; } @@ -1047,38 +1079,37 @@ with next() to fully traverse all of the terms so the index will be built. */ - NumberedTermEnum getEnumerator(IndexReader reader) throws IOException { - if (index==null) return new NumberedTermEnum(reader,this, prefix==null?"":prefix, 0) { - ArrayList lst; + NumberedTermsEnum getEnumerator(IndexReader reader) throws IOException { + if (index==null) return new NumberedTermsEnum(reader,this, prefix==null?new BytesRef():prefix, 0) { + ArrayList lst; - protected boolean setTerm() { - boolean b = super.setTerm(); - if (b && (pos & intervalMask)==0) { - String text = term().text(); - sizeOfStrings += text.length() << 1; + protected BytesRef setTerm() throws IOException { + BytesRef br = super.setTerm(); + if (br != null && (pos & intervalMask)==0) { + sizeOfStrings += br.length; if (lst==null) { - lst = new ArrayList(); + lst = new ArrayList(); } - lst.add(text); + lst.add(new BytesRef(br)); } - return b; + return br; } - public boolean skipTo(Term target) throws IOException { + public BytesRef skipTo(Term target) throws IOException { throw new UnsupportedOperationException(); } - public boolean skipTo(int termNumber) throws IOException { + public BytesRef skipTo(int termNumber) throws IOException { throw new UnsupportedOperationException(); } public void close() throws IOException { nTerms=pos; super.close(); - index = lst!=null ? lst.toArray(new String[lst.size()]) : new String[0]; + index = lst!=null ? lst.toArray(new BytesRef[lst.size()]) : new BytesRef[0]; } }; - else return new NumberedTermEnum(reader,this,"",0); + else return new NumberedTermsEnum(reader,this,new BytesRef(),0); } Index: solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java =================================================================== --- solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (revision 954967) +++ solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (working copy) @@ -37,13 +37,17 @@ import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.BytesRef; import org.apache.solr.analysis.CharFilterFactory; import org.apache.solr.analysis.TokenFilterFactory; import org.apache.solr.analysis.TokenizerChain; @@ -468,18 +472,18 @@ indexInfo.add("maxDoc", reader.maxDoc()); if( countTerms ) { - TermEnum te = null; - try{ - te = reader.terms(); - int numTerms = 0; - while (te.next()) { - numTerms++; + Fields fields = MultiFields.getFields(reader); + int numTerms = 0; + if (fields != null) { + FieldsEnum fieldsEnum = fields.iterator(); + while(fieldsEnum.next() != null) { + TermsEnum termsEnum = fieldsEnum.terms(); + while(termsEnum.next() != null) { + numTerms++; + } } - indexInfo.add("numTerms", numTerms ); } - finally{ - if( te != null ) te.close(); - } + indexInfo.add("numTerms", numTerms ); } indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )? @@ -528,7 +532,6 @@ int maxBucket = -1; public Map hist = new HashMap(); - private static final double LOG2 = Math.log( 2 ); public static int getPowerOfTwoBucket( int num ) { return Math.max(1, Integer.highestOneBit(num-1) << 1); @@ -621,45 +624,48 @@ private static Map getTopTerms( IndexReader reader, Set fields, int numTerms, Set junkWords ) throws Exception { Map info = new HashMap(); - - TermEnum terms = null; - try{ - terms = reader.terms(); - while (terms.next()) { - String field = terms.term().field(); - String t = terms.term().text(); + + Fields fieldsC = MultiFields.getFields(reader); + if (fieldsC != null) { + FieldsEnum fieldsEnum = fieldsC.iterator(); + String field; + while((field = fieldsEnum.next()) != null) { + + TermsEnum termsEnum = fieldsEnum.terms(); + BytesRef text; + while((text = termsEnum.next()) != null) { + String t = text.utf8ToString(); - // Compute distinct terms for every field - TopTermQueue tiq = info.get( field ); - if( tiq == null ) { - tiq = new TopTermQueue( numTerms+1 ); - info.put( field, tiq ); - } - tiq.distinctTerms++; - tiq.histogram.add( terms.docFreq() ); // add the term to the histogram + // Compute distinct terms for every field + TopTermQueue tiq = info.get( field ); + if( tiq == null ) { + tiq = new TopTermQueue( numTerms+1 ); + info.put( field, tiq ); + } + + tiq.distinctTerms++; + tiq.histogram.add( termsEnum.docFreq() ); // add the term to the histogram - // Only save the distinct terms for fields we worry about - if (fields != null && fields.size() > 0) { - if( !fields.contains( field ) ) { + // Only save the distinct terms for fields we worry about + if (fields != null && fields.size() > 0) { + if( !fields.contains( field ) ) { + continue; + } + } + if( junkWords != null && junkWords.contains( t ) ) { continue; } - } - if( junkWords != null && junkWords.contains( t ) ) { - continue; - } - if( terms.docFreq() > tiq.minFreq ) { - tiq.add(new TopTermQueue.TermInfo(terms.term(), terms.docFreq())); + if( termsEnum.docFreq() > tiq.minFreq ) { + tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq())); if (tiq.size() > numTerms) { // if tiq full - tiq.pop(); // remove lowest in tiq - tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq + tiq.pop(); // remove lowest in tiq + tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq + } } } } } - finally { - if( terms != null ) terms.close(); - } return info; } } Index: solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java =================================================================== --- solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java (revision 954967) +++ solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java (working copy) @@ -4,9 +4,12 @@ import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.TermVectorMapper; import org.apache.lucene.index.TermVectorOffsetInfo; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; @@ -31,7 +34,7 @@ import java.util.Iterator; import java.util.List; import java.util.Set; -import java.util.logging.Logger; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -252,9 +255,12 @@ int result = 1; currentTerm = currentTerm.createTerm(term); try { - TermEnum termEnum = reader.terms(currentTerm); - if (termEnum != null && termEnum.term().equals(currentTerm)) { - result = termEnum.docFreq(); + Terms terms = MultiFields.getTerms(reader, currentTerm.field()); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + if (termsEnum.seek(new BytesRef(term)) == TermsEnum.SeekStatus.FOUND) { + result = termsEnum.docFreq(); + } } } catch (IOException e) { throw new RuntimeException(e); Index: solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java =================================================================== --- solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java (revision 954967) +++ solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java (working copy) @@ -21,19 +21,18 @@ import java.util.Iterator; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.BytesRef; /** * HighFrequencyDictionary: terms taken from the given field * of a Lucene index, which appear in a number of documents * above a given threshold. * - * When using IndexReader.terms(Term) the code must not call next() on TermEnum - * as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6 - * * Threshold is a value in [0..1] representing the minimum * number of documents (of the total) where a term should appear. * @@ -55,41 +54,34 @@ } final class HighFrequencyIterator implements Iterator { - private TermEnum termEnum; - private Term actualTerm; + private TermsEnum termsEnum; + private BytesRef actualTerm; private boolean hasNextCalled; private int minNumDocs; HighFrequencyIterator() { try { - termEnum = reader.terms(new Term(field, "")); + Terms terms = MultiFields.getTerms(reader, field); + if (terms != null) { + termsEnum = terms.iterator(); + } minNumDocs = (int)(thresh * (float)reader.numDocs()); } catch (IOException e) { throw new RuntimeException(e); } } - private boolean isFrequent(Term term) { - try { - return reader.docFreq(term) >= minNumDocs; - } catch (IOException e) { - throw new RuntimeException(e); - } + private boolean isFrequent(int freq) { + return freq >= minNumDocs; } public Object next() { - if (!hasNextCalled) { - hasNext(); + if (!hasNextCalled && !hasNext()) { + return null; } hasNextCalled = false; - try { - termEnum.next(); - } catch (IOException e) { - throw new RuntimeException(e); - } - - return (actualTerm != null) ? actualTerm.text() : null; + return (actualTerm != null) ? actualTerm.utf8ToString() : null; } public boolean hasNext() { @@ -98,35 +90,28 @@ } hasNextCalled = true; - do { - actualTerm = termEnum.term(); + if (termsEnum == null) { + return false; + } + while(true) { + + try { + actualTerm = termsEnum.next(); + } catch (IOException e) { + throw new RuntimeException(e); + } + // if there are no words return false if (actualTerm == null) { return false; } - String currentField = actualTerm.field(); - - // if the next word doesn't have the same field return false - if (currentField != field) { // intern'd comparison - actualTerm = null; - return false; - } - // got a valid term, does it pass the threshold? - if (isFrequent(actualTerm)) { + if (isFrequent(termsEnum.docFreq())) { return true; } - - // term not up to threshold - try { - termEnum.next(); - } catch (IOException e) { - throw new RuntimeException(e); - } - - } while (true); + } } public void remove() { Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (revision 954967) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (working copy) @@ -12,10 +12,13 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermPositions; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; +import org.apache.lucene.util.BytesRef; /** @@ -279,8 +282,11 @@ // Make sure position is still incremented when // massive term is skipped: - TermPositions tps = reader.termPositions(new Term("content", "another")); - assertTrue(tps.next()); + DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, + MultiFields.getDeletedDocs(reader), + "content", + new BytesRef("another")); + assertTrue(tps.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(1, tps.freq()); assertEquals(3, tps.nextPosition()); Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (revision 954967) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (working copy) @@ -29,9 +29,11 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; public class TestKeywordAnalyzer extends BaseTokenStreamTestCase { @@ -82,10 +84,16 @@ writer.close(); IndexReader reader = IndexReader.open(dir, true); - TermDocs td = reader.termDocs(new Term("partnum", "Q36")); - assertTrue(td.next()); - td = reader.termDocs(new Term("partnum", "Q37")); - assertTrue(td.next()); + DocsEnum td = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + "partnum", + new BytesRef("Q36")); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + td = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + "partnum", + new BytesRef("Q37")); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); } // LUCENE-1441 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (revision 954967) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (working copy) @@ -18,12 +18,15 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.Version; +import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.io.Reader; @@ -141,20 +144,15 @@ */ public int addStopWords(IndexReader reader, String fieldName, int maxDocFreq) throws IOException { HashSet stopWords = new HashSet(); - String internedFieldName = StringHelper.intern(fieldName); - TermEnum te = reader.terms(new Term(fieldName)); - Term term = te.term(); - while (term != null) { - if (term.field() != internedFieldName) { - break; + Terms terms = MultiFields.getTerms(reader, fieldName); + if (terms != null) { + TermsEnum te = terms.iterator(); + BytesRef text; + while ((text = te.next()) != null) { + if (te.docFreq() > maxDocFreq) { + stopWords.add(text.utf8ToString()); + } } - if (te.docFreq() > maxDocFreq) { - stopWords.add(term.text()); - } - if (!te.next()) { - break; - } - term = te.term(); } stopWordsPerField.put(fieldName, stopWords); Index: lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (revision 954967) +++ lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (working copy) @@ -27,11 +27,12 @@ import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermPositions; +import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; public class TestCachingTokenFilter extends BaseTokenStreamTestCase { private String[] tokens = new String[] {"term1", "term2", "term3", "term2"}; @@ -75,19 +76,28 @@ writer.close(); IndexReader reader = IndexReader.open(dir, true); - TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1")); - assertTrue(termPositions.next()); + DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, + MultiFields.getDeletedDocs(reader), + "preanalyzed", + new BytesRef("term1")); + assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertEquals(1, termPositions.freq()); assertEquals(0, termPositions.nextPosition()); - termPositions.seek(new Term("preanalyzed", "term2")); - assertTrue(termPositions.next()); + termPositions = MultiFields.getTermPositionsEnum(reader, + MultiFields.getDeletedDocs(reader), + "preanalyzed", + new BytesRef("term2")); + assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertEquals(2, termPositions.freq()); assertEquals(1, termPositions.nextPosition()); assertEquals(3, termPositions.nextPosition()); - termPositions.seek(new Term("preanalyzed", "term3")); - assertTrue(termPositions.next()); + termPositions = MultiFields.getTermPositionsEnum(reader, + MultiFields.getDeletedDocs(reader), + "preanalyzed", + new BytesRef("term3")); + assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertEquals(1, termPositions.freq()); assertEquals(2, termPositions.nextPosition()); reader.close(); Index: lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java (working copy) @@ -32,11 +32,12 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermPositions; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.store.Directory; @@ -50,6 +51,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.RegExp; +import org.apache.lucene.util.BytesRef; /** * Term position unit test. @@ -99,13 +101,19 @@ IndexSearcher searcher = new IndexSearcher(store, true); - TermPositions pos = searcher.getIndexReader().termPositions(new Term("field", "1")); - pos.next(); + DocsAndPositionsEnum pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(), + MultiFields.getDeletedDocs(searcher.getIndexReader()), + "field", + new BytesRef("1")); + pos.nextDoc(); // first token should be at position 0 assertEquals(0, pos.nextPosition()); - pos = searcher.getIndexReader().termPositions(new Term("field", "2")); - pos.next(); + pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(), + MultiFields.getDeletedDocs(searcher.getIndexReader()), + "field", + new BytesRef("2")); + pos.nextDoc(); // second token should be at position 2 assertEquals(2, pos.nextPosition()); @@ -238,9 +246,13 @@ IndexReader r = writer.getReader(); - TermPositions tp = r.termPositions(new Term("content", "a")); + DocsAndPositionsEnum tp = MultiFields.getTermPositionsEnum(r, + MultiFields.getDeletedDocs(r), + "content", + new BytesRef("a")); + int count = 0; - assertTrue(tp.next()); + assertTrue(tp.nextDoc() != tp.NO_MORE_DOCS); // "a" occurs 4 times assertEquals(4, tp.freq()); int expected = 0; @@ -250,7 +262,7 @@ assertEquals(6, tp.nextPosition()); // only one doc has "a" - assertFalse(tp.next()); + assertEquals(tp.NO_MORE_DOCS, tp.nextDoc()); IndexSearcher is = new IndexSearcher(r); Index: lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy) @@ -420,9 +420,4 @@ //assertEquals("C added => A,B,,C in range", 3, hits.length()); searcher.close(); } - - @Deprecated - public void testBackwardsLayer() { - assertTrue(new TermRangeQuery("dummy", null, null, true, true).hasNewAPI); - } } Index: lucene/src/test/org/apache/lucene/search/TestTermVectors.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermVectors.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestTermVectors.java (working copy) @@ -244,40 +244,38 @@ writer.addDocument(testDoc4); writer.close(); IndexSearcher knownSearcher = new IndexSearcher(dir, true); - TermEnum termEnum = knownSearcher.reader.terms(); - TermDocs termDocs = knownSearcher.reader.termDocs(); - //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); - - //Similarity sim = knownSearcher.getSimilarity(); - while (termEnum.next() == true) - { - Term term = termEnum.term(); - //System.out.println("Term: " + term); - termDocs.seek(term); - while (termDocs.next()) - { - int docId = termDocs.doc(); - int freq = termDocs.freq(); - //System.out.println("Doc Id: " + docId + " freq " + freq); - TermFreqVector vector = knownSearcher.reader.getTermFreqVector(docId, "field"); - //float tf = sim.tf(freq); - //float idf = sim.idf(knownSearcher.docFreq(term), knownSearcher.maxDoc()); - //float qNorm = sim.queryNorm() - //This is fine since we don't have stop words - //float lNorm = sim.lengthNorm("field", vector.getTerms().length); - //float coord = sim.coord() - //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); - assertTrue(vector != null); - String[] vTerms = vector.getTerms(); - int [] freqs = vector.getTermFrequencies(); - for (int i = 0; i < vTerms.length; i++) - { - if (term.text().equals(vTerms[i])) - { - assertTrue(freqs[i] == freq); - } + FieldsEnum fields = MultiFields.getFields(knownSearcher.reader).iterator(); + + DocsEnum docs = null; + while(fields.next() != null) { + TermsEnum terms = fields.terms(); + while(terms.next() != null) { + String text = terms.term().utf8ToString(); + docs = terms.docs(MultiFields.getDeletedDocs(knownSearcher.reader), docs); + + while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { + int docId = docs.docID(); + int freq = docs.freq(); + //System.out.println("Doc Id: " + docId + " freq " + freq); + TermFreqVector vector = knownSearcher.reader.getTermFreqVector(docId, "field"); + //float tf = sim.tf(freq); + //float idf = sim.idf(knownSearcher.docFreq(term), knownSearcher.maxDoc()); + //float qNorm = sim.queryNorm() + //This is fine since we don't have stop words + //float lNorm = sim.lengthNorm("field", vector.getTerms().length); + //float coord = sim.coord() + //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); + assertTrue(vector != null); + String[] vTerms = vector.getTerms(); + int [] freqs = vector.getTermFrequencies(); + for (int i = 0; i < vTerms.length; i++) + { + if (text.equals(vTerms[i])) + { + assertTrue(freqs[i] == freq); + } + } } - } //System.out.println("--------"); } Index: lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java (working copy) @@ -21,11 +21,13 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.store.RAMDirectory; import java.io.IOException; @@ -79,13 +81,16 @@ // this TermEnum gives "piccadilly", "pie" and "pizza". String prefix = "pi"; - TermEnum te = ir.terms(new Term("body", prefix + "*")); + TermsEnum te = MultiFields.getFields(ir).terms("body").iterator(); + te.seek(new BytesRef(prefix)); do { - if (te.term().text().startsWith(prefix)) - { - termsWithPrefix.add(te.term()); + String s = te.term().utf8ToString(); + if (s.startsWith(prefix)) { + termsWithPrefix.add(new Term("body", s)); + } else { + break; } - } while (te.next()); + } while (te.next() != null); query1.add(termsWithPrefix.toArray(new Term[0])); query2.add(termsWithPrefix.toArray(new Term[0])); Index: lucene/src/test/org/apache/lucene/search/TestWildcard.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestWildcard.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestWildcard.java (working copy) @@ -19,12 +19,10 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.Index; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; @@ -309,62 +307,4 @@ searcher.close(); } - @Deprecated - private static final class OldWildcardQuery extends MultiTermQuery { - final Term term; - - OldWildcardQuery(Term term) { - this.term = term; - } - - @Override - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new WildcardTermEnum(reader, term); - } - - @Override - public String toString(String field) { - return "OldWildcard(" + term.toString()+ ")"; - } - } - - @Deprecated - public void testDeprecatedTermEnum() throws Exception { - RAMDirectory indexStore = getIndexStore("body", new String[] - {"metal", "metals"}); - IndexSearcher searcher = new IndexSearcher(indexStore, true); - Query query1 = new TermQuery(new Term("body", "metal")); - Query query2 = new OldWildcardQuery(new Term("body", "metal*")); - Query query3 = new OldWildcardQuery(new Term("body", "m*tal")); - Query query4 = new OldWildcardQuery(new Term("body", "m*tal*")); - Query query5 = new OldWildcardQuery(new Term("body", "m*tals")); - - BooleanQuery query6 = new BooleanQuery(); - query6.add(query5, BooleanClause.Occur.SHOULD); - - BooleanQuery query7 = new BooleanQuery(); - query7.add(query3, BooleanClause.Occur.SHOULD); - query7.add(query5, BooleanClause.Occur.SHOULD); - - // Queries do not automatically lower-case search terms: - Query query8 = new OldWildcardQuery(new Term("body", "M*tal*")); - - assertMatches(searcher, query1, 1); - assertMatches(searcher, query2, 2); - assertMatches(searcher, query3, 1); - assertMatches(searcher, query4, 2); - assertMatches(searcher, query5, 1); - assertMatches(searcher, query6, 1); - assertMatches(searcher, query7, 2); - assertMatches(searcher, query8, 0); - assertMatches(searcher, new OldWildcardQuery(new Term("body", "*tall")), 0); - assertMatches(searcher, new OldWildcardQuery(new Term("body", "*tal")), 1); - assertMatches(searcher, new OldWildcardQuery(new Term("body", "*tal*")), 2); - } - - @Deprecated - public void testBackwardsLayer() { - assertTrue(new WildcardQuery(new Term("body", "metal*")).hasNewAPI); - assertFalse(new OldWildcardQuery(new Term("body", "metal*")).hasNewAPI); - } } Index: lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java (working copy) @@ -196,10 +196,4 @@ assertSame(TermsEnum.EMPTY, aq.getTermsEnum(searcher.getIndexReader())); assertEquals(0, automatonQueryNrHits(aq)); } - - @Deprecated - public void testBackwardsLayer() { - assertTrue(new AutomatonQuery(newTerm("bogus"), BasicAutomata - .makeString("piece")).hasNewAPI); - } } Index: lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java (working copy) @@ -377,10 +377,4 @@ doc.add(new Field("field", text, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } - - @Deprecated - public void testBackwardsLayer() { - assertTrue(new FuzzyQuery(new Term("dummy", "dummy")).hasNewAPI); - } - } Index: lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java (working copy) @@ -25,7 +25,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.automaton.Automaton; @@ -120,9 +119,4 @@ public void testBacktracking() throws IOException { assertEquals(1, regexQueryNrHits("4934[314]")); } - - @Deprecated - public void testBackwardsLayer() { - assertTrue(new RegexpQuery(newTerm(".*")).hasNewAPI); - } } Index: lucene/src/test/org/apache/lucene/search/JustCompileSearch.java =================================================================== --- lucene/src/test/org/apache/lucene/search/JustCompileSearch.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/JustCompileSearch.java (working copy) @@ -298,25 +298,6 @@ } - static final class JustCompileFilteredTermEnum extends FilteredTermEnum { - - @Override - public float difference() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - protected boolean endEnum() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - protected boolean termCompare(Term term) { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - } - static final class JustCompilePhraseScorer extends PhraseScorer { JustCompilePhraseScorer(Weight weight, DocsAndPositionsEnum[] docs, int[] offsets, Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy) @@ -590,10 +590,4 @@ ); // difference to int range is tested in TestNumericRangeQuery32 } - - @Test @Deprecated - public void testBackwardsLayer() { - assertTrue(NumericRangeQuery.newLongRange("dummy", null, null, true, true).hasNewAPI); - } - } Index: lucene/src/test/org/apache/lucene/search/TestMultiTermQueryBWComp.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiTermQueryBWComp.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestMultiTermQueryBWComp.java (working copy) @@ -1,239 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.LuceneTestCaseJ4; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; -import static org.junit.Assert.*; - -/** - * Test MultiTermQuery api backwards compat - * @deprecated Remove test when old API is no longer supported - */ -@Deprecated -public class TestMultiTermQueryBWComp extends LuceneTestCaseJ4 { - private static RAMDirectory dir; - private static Searcher searcher; - private static final String FIELD = "test"; - - /** - * Test that the correct method (getTermsEnum/getEnum) is called. - */ - @Test - public void testEnumMethod() throws IOException { - assertAPI("old", new OldAPI(FIELD)); - assertAPI("new", new NewAPI(FIELD)); - assertAPI("new", new BothAPI(FIELD)); - - assertAPI("old2", new OldExtendsOldAPI(FIELD)); - assertAPI("old2", new OldExtendsNewAPI(FIELD)); - assertAPI("old2", new OldExtendsBothAPI(FIELD)); - - assertAPI("new2", new NewExtendsOldAPI(FIELD)); - assertAPI("new2", new NewExtendsNewAPI(FIELD)); - assertAPI("new2", new NewExtendsBothAPI(FIELD)); - - assertAPI("new2", new BothExtendsOldAPI(FIELD)); - assertAPI("new2", new BothExtendsNewAPI(FIELD)); - assertAPI("new2", new BothExtendsBothAPI(FIELD)); - } - - private static void assertAPI(String expected, Query query) throws IOException { - TopDocs td = searcher.search(query, 25); - assertEquals(1, td.totalHits); - Document doc = searcher.doc(td.scoreDocs[0].doc); - assertEquals(expected, doc.get(FIELD)); - } - - private class OldAPI extends MultiTermQuery { - OldAPI(String field) { super(field); } - - @Override - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new SingleTermEnum(reader, new Term(FIELD, "old")); - } - - @Override - public String toString(String field) { return null; } - } - - private class NewAPI extends MultiTermQuery { - NewAPI(String field) { super(field); } - - @Override - protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - return new SingleTermsEnum(reader, new Term(FIELD, "new")); - } - - @Override - public String toString(String field) { return null; } - } - - private class BothAPI extends MultiTermQuery { - BothAPI(String field) { super(field); } - - @Override - protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - return new SingleTermsEnum(reader, new Term(FIELD, "new")); - } - - @Override - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new SingleTermEnum(reader, new Term(FIELD, "old")); - } - - @Override - public String toString(String field) { return null; } - } - - private class OldExtendsOldAPI extends OldAPI { - OldExtendsOldAPI(String field) { super(field); } - - @Override - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new SingleTermEnum(reader, new Term(FIELD, "old2")); - } - } - - private class OldExtendsNewAPI extends NewAPI { - OldExtendsNewAPI(String field) { super(field); } - - @Override - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new SingleTermEnum(reader, new Term(FIELD, "old2")); - } - } - - private class OldExtendsBothAPI extends BothAPI { - OldExtendsBothAPI(String field) { super(field); } - - @Override - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new SingleTermEnum(reader, new Term(FIELD, "old2")); - } - } - - private class NewExtendsOldAPI extends OldAPI { - NewExtendsOldAPI(String field) { super(field); } - - @Override - protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - return new SingleTermsEnum(reader, new Term(FIELD, "new2")); - } - } - - private class NewExtendsNewAPI extends NewAPI { - NewExtendsNewAPI(String field) { super(field); } - - @Override - protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - return new SingleTermsEnum(reader, new Term(FIELD, "new2")); - } - } - - private class NewExtendsBothAPI extends BothAPI { - NewExtendsBothAPI(String field) { super(field); } - - @Override - protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - return new SingleTermsEnum(reader, new Term(FIELD, "new2")); - } - } - - private class BothExtendsOldAPI extends OldAPI { - BothExtendsOldAPI(String field) { super(field); } - - @Override - protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - return new SingleTermsEnum(reader, new Term(FIELD, "new2")); - } - - @Override - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new SingleTermEnum(reader, new Term(FIELD, "old2")); - } - } - - private class BothExtendsNewAPI extends NewAPI { - BothExtendsNewAPI(String field) { super(field); } - - @Override - protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - return new SingleTermsEnum(reader, new Term(FIELD, "new2")); - } - - @Override - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new SingleTermEnum(reader, new Term(FIELD, "old2")); - } - } - - private class BothExtendsBothAPI extends BothAPI { - BothExtendsBothAPI(String field) { super(field); } - - @Override - protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - return new SingleTermsEnum(reader, new Term(FIELD, "new2")); - } - - @Override - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new SingleTermEnum(reader, new Term(FIELD, "old2")); - } - } - - @BeforeClass - public static void beforeClass() throws Exception { - dir = new RAMDirectory(); - IndexWriter writer = new IndexWriter(dir, - new MockAnalyzer(), true, - IndexWriter.MaxFieldLength.LIMITED); - - String values[] = { "old", "old2", "new", "new2" }; - for (String value : values) { - Document doc = new Document(); - doc.add(new Field(FIELD, value, - Field.Store.YES, Field.Index.ANALYZED)); - writer.addDocument(doc); - } - - writer.optimize(); - writer.close(); - searcher = new IndexSearcher(dir, true); - } - - @AfterClass - public static void afterClass() throws Exception { - searcher.close(); - searcher = null; - dir.close(); - dir = null; - } -} Index: lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java (working copy) @@ -59,9 +59,4 @@ hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("everything", 3, hits.length); } - - @Deprecated - public void testBackwardsLayer() { - assertTrue(new PrefixQuery(new Term("dummy", "dummy")).hasNewAPI); - } } Index: lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (revision 954967) +++ lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (working copy) @@ -19,21 +19,20 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.Version; import java.io.IOException; -import java.util.HashSet; import java.util.LinkedList; -import java.util.Collections; /** * This class tests the MultiPhraseQuery class. @@ -73,13 +72,16 @@ // this TermEnum gives "piccadilly", "pie" and "pizza". String prefix = "pi"; - TermEnum te = ir.terms(new Term("body", prefix)); + TermsEnum te = MultiFields.getFields(ir).terms("body").iterator(); + te.seek(new BytesRef(prefix)); do { - if (te.term().text().startsWith(prefix)) - { - termsWithPrefix.add(te.term()); + String s = te.term().utf8ToString(); + if (s.startsWith(prefix)) { + termsWithPrefix.add(new Term("body", s)); + } else { + break; } - } while (te.next()); + } while (te.next() != null); query1.add(termsWithPrefix.toArray(new Term[0])); assertEquals("body:\"blueberry (piccadilly pie pizza)\"", query1.toString()); @@ -96,13 +98,14 @@ MultiPhraseQuery query3 = new MultiPhraseQuery(); termsWithPrefix.clear(); prefix = "blue"; - te = ir.terms(new Term("body", prefix)); + te.seek(new BytesRef(prefix)); + do { - if (te.term().text().startsWith(prefix)) + if (te.term().utf8ToString().startsWith(prefix)) { - termsWithPrefix.add(te.term()); + termsWithPrefix.add(new Term("body", te.term().utf8ToString())); } - } while (te.next()); + } while (te.next() != null); ir.close(); query3.add(termsWithPrefix.toArray(new Term[0])); query3.add(new Term("body", "pizza")); Index: lucene/src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDoc.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -213,15 +213,19 @@ for (int i = 0; i < reader.numDocs(); i++) out.println(reader.document(i)); - TermEnum tis = reader.terms(); - while (tis.next()) { - out.print(tis.term()); - out.println(" DF=" + tis.docFreq()); + FieldsEnum fis = reader.fields().iterator(); + String field = fis.next(); + while(field != null) { + TermsEnum tis = fis.terms(); + while(tis.next() != null) { - TermPositions positions = reader.termPositions(tis.term()); - try { - while (positions.next()) { - out.print(" doc=" + positions.doc()); + out.print(" term=" + field + ":" + tis.term()); + out.println(" DF=" + tis.docFreq()); + + DocsAndPositionsEnum positions = tis.docsAndPositions(reader.getDeletedDocs(), null); + + while (positions.nextDoc() != positions.NO_MORE_DOCS) { + out.print(" doc=" + positions.docID()); out.print(" TF=" + positions.freq()); out.print(" pos="); out.print(positions.nextPosition()); @@ -229,11 +233,9 @@ out.print("," + positions.nextPosition()); out.println(""); } - } finally { - positions.close(); } + field = fis.next(); } - tis.close(); reader.close(); } } Index: lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Bits; public class TestParallelTermEnum extends LuceneTestCase { private IndexReader ir1; @@ -77,105 +78,123 @@ pr.add(ir1); pr.add(ir2); - TermDocs td = pr.termDocs(); + Bits delDocs = pr.getDeletedDocs(); - TermEnum te = pr.terms(); - assertTrue(te.next()); - assertEquals("field1:brown", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field1:fox", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field1:jumps", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field1:quick", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field1:the", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field2:brown", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field2:fox", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field2:jumps", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field2:quick", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field2:the", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field3:dog", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field3:fox", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field3:jumps", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field3:lazy", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field3:over", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertTrue(te.next()); - assertEquals("field3:the", te.term().toString()); - td.seek(te.term()); - assertTrue(td.next()); - assertEquals(0, td.doc()); - assertFalse(td.next()); - assertFalse(te.next()); + FieldsEnum fe = pr.fields().iterator(); + + String f = fe.next(); + assertEquals("field0", f); + f = fe.next(); + assertEquals("field1", f); + + TermsEnum te = fe.terms(); + + assertEquals("brown", te.next().utf8ToString()); + DocsEnum td = te.docs(delDocs, null); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("fox", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("jumps", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("quick", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("the", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertNull(te.next()); + f = fe.next(); + assertEquals("field2", f); + te = fe.terms(); + + assertEquals("brown", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("fox", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("jumps", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("quick", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("the", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertNull(te.next()); + f = fe.next(); + assertEquals("field3", f); + te = fe.terms(); + + assertEquals("dog", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("fox", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("jumps", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("lazy", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("over", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertEquals("the", te.next().utf8ToString()); + td = te.docs(delDocs, td); + assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, td.docID()); + assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); + + assertNull(te.next()); } } Index: lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -83,36 +84,32 @@ throws IOException { IndexReader reader = IndexReader.open(dir, true); - TermEnum termEnum = null; + TermsEnum termEnum = MultiFields.getTerms(reader, "content").iterator(); // create enumeration of all terms - termEnum = reader.terms(); // go to the first term (aaa) termEnum.next(); // assert that term is 'aaa' - assertEquals("aaa", termEnum.term().text()); + assertEquals("aaa", termEnum.term().utf8ToString()); assertEquals(200, termEnum.docFreq()); // go to the second term (bbb) termEnum.next(); // assert that term is 'bbb' - assertEquals("bbb", termEnum.term().text()); + assertEquals("bbb", termEnum.term().utf8ToString()); assertEquals(100, termEnum.docFreq()); - termEnum.close(); - - // create enumeration of terms after term 'aaa', including 'aaa' - termEnum = reader.terms(new Term("content", "aaa")); + // create enumeration of terms after term 'aaa', + // including 'aaa' + termEnum.seek(new BytesRef("aaa")); // assert that term is 'aaa' - assertEquals("aaa", termEnum.term().text()); + assertEquals("aaa", termEnum.term().utf8ToString()); assertEquals(200, termEnum.docFreq()); // go to term 'bbb' termEnum.next(); // assert that term is 'bbb' - assertEquals("bbb", termEnum.term().text()); + assertEquals("bbb", termEnum.term().utf8ToString()); assertEquals(100, termEnum.docFreq()); - - termEnum.close(); } private void addDoc(IndexWriter writer, String value) throws IOException Index: lucene/src/test/org/apache/lucene/index/TestIndexReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexReader.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestIndexReader.java (working copy) @@ -55,6 +55,8 @@ import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Bits; public class TestIndexReader extends LuceneTestCase { @@ -287,22 +289,17 @@ int expected) throws IOException { - TermDocs tdocs = null; - - try { - tdocs = reader.termDocs(term); - assertNotNull(msg + ", null TermDocs", tdocs); - int count = 0; - while(tdocs.next()) { - count++; - } - assertEquals(msg + ", count mismatch", expected, count); - - } finally { - if (tdocs != null) - tdocs.close(); + DocsEnum tdocs = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + term.field(), + new BytesRef(term.text())); + int count = 0; + if (tdocs != null) { + while(tdocs.nextDoc()!= tdocs.NO_MORE_DOCS) { + count++; + } } - + assertEquals(msg + ", count mismatch", expected, count); } public void testBasicDelete() throws IOException { @@ -1348,21 +1345,26 @@ } // check dictionary and posting lists - TermEnum enum1 = index1.terms(); - TermEnum enum2 = index2.terms(); - TermPositions tp1 = index1.termPositions(); - TermPositions tp2 = index2.termPositions(); - while(enum1.next()) { - assertTrue(enum2.next()); - assertEquals("Different term in dictionary.", enum1.term(), enum2.term()); - tp1.seek(enum1.term()); - tp2.seek(enum1.term()); - while(tp1.next()) { - assertTrue(tp2.next()); - assertEquals("Different doc id in postinglist of term " + enum1.term() + ".", tp1.doc(), tp2.doc()); - assertEquals("Different term frequence in postinglist of term " + enum1.term() + ".", tp1.freq(), tp2.freq()); - for (int i = 0; i < tp1.freq(); i++) { - assertEquals("Different positions in postinglist of term " + enum1.term() + ".", tp1.nextPosition(), tp2.nextPosition()); + FieldsEnum fenum1 = MultiFields.getFields(index1).iterator(); + FieldsEnum fenum2 = MultiFields.getFields(index1).iterator(); + String field1 = null; + Bits delDocs = MultiFields.getDeletedDocs(index1); + while((field1=fenum1.next()) != null) { + assertEquals("Different fields", field1, fenum2.next()); + TermsEnum enum1 = fenum1.terms(); + TermsEnum enum2 = fenum2.terms(); + while(enum1.next() != null) { + assertEquals("Different terms", enum1.term(), enum2.next()); + DocsAndPositionsEnum tp1 = enum1.docsAndPositions(delDocs, null); + DocsAndPositionsEnum tp2 = enum2.docsAndPositions(delDocs, null); + + while(tp1.nextDoc() != DocsEnum.NO_MORE_DOCS) { + assertTrue(tp2.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals("Different doc id in postinglist of term " + enum1.term() + ".", tp1.docID(), tp2.docID()); + assertEquals("Different term frequence in postinglist of term " + enum1.term() + ".", tp1.freq(), tp2.freq()); + for (int i = 0; i < tp1.freq(); i++) { + assertEquals("Different positions in postinglist of term " + enum1.term() + ".", tp1.nextPosition(), tp2.nextPosition()); + } } } } Index: lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (working copy) @@ -71,8 +71,6 @@ IndexReader r = dw.writer.getReader(); dw.writer.commit(); verifyEquals(r, dir, "id"); - FlexTestUtil.verifyFlexVsPreFlex(this.r, r); - FlexTestUtil.verifyFlexVsPreFlex(this.r, dir); r.close(); dw.writer.close(); dir.close(); @@ -94,8 +92,6 @@ // verifyEquals(dir2, dir2, "id"); verifyEquals(dir1, dir2, "id"); - FlexTestUtil.verifyFlexVsPreFlex(r, dir1); - FlexTestUtil.verifyFlexVsPreFlex(r, dir2); } public void testMultiConfig() throws Throwable { @@ -121,9 +117,6 @@ indexSerial(docs, dir2); //System.out.println("TEST: verify"); verifyEquals(dir1, dir2, "id"); - - FlexTestUtil.verifyFlexVsPreFlex(r, dir1); - FlexTestUtil.verifyFlexVsPreFlex(r, dir2); } } Index: lucene/src/test/org/apache/lucene/index/TestFlex.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestFlex.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestFlex.java (working copy) @@ -52,13 +52,9 @@ } IndexReader r = w.getReader(); - TermEnum terms = r.terms(new Term("field3", "bbb")); - // pre-flex API should seek to the next field - assertNotNull(terms.term()); - assertEquals("field4", terms.term().field()); - terms = r.terms(new Term("field5", "abc")); - assertNull(terms.term()); + TermsEnum terms = MultiFields.getTerms(r, "field3").iterator(); + assertEquals(TermsEnum.SeekStatus.END, terms.seek(new BytesRef("abc"))); r.close(); } Index: lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.util.BytesRef; import java.io.IOException; @@ -56,11 +57,12 @@ SegmentReader reader = SegmentReader.get(true, info, indexDivisor); assertTrue(reader != null); assertEquals(indexDivisor, reader.getTermInfosIndexDivisor()); - TermDocs termDocs = reader.termDocs(); - assertTrue(termDocs != null); - termDocs.seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "field")); - if (termDocs.next() == true) { - int docId = termDocs.doc(); + + TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator(); + terms.seek(new BytesRef("field")); + DocsEnum termDocs = terms.docs(reader.getDeletedDocs(), null); + if (termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) { + int docId = termDocs.docID(); assertTrue(docId == 0); int freq = termDocs.freq(); assertTrue(freq == 3); @@ -77,20 +79,21 @@ //After adding the document, we should be able to read it back in SegmentReader reader = SegmentReader.get(true, info, indexDivisor); assertTrue(reader != null); - TermDocs termDocs = reader.termDocs(); - assertTrue(termDocs != null); - termDocs.seek(new Term("textField2", "bad")); - assertTrue(termDocs.next() == false); + DocsEnum termDocs = reader.termDocsEnum(reader.getDeletedDocs(), + "textField2", + new BytesRef("bad")); + + assertNull(termDocs); reader.close(); } { //After adding the document, we should be able to read it back in SegmentReader reader = SegmentReader.get(true, info, indexDivisor); assertTrue(reader != null); - TermDocs termDocs = reader.termDocs(); - assertTrue(termDocs != null); - termDocs.seek(new Term("junk", "bad")); - assertTrue(termDocs.next() == false); + DocsEnum termDocs = reader.termDocsEnum(reader.getDeletedDocs(), + "junk", + new BytesRef("bad")); + assertNull(termDocs); reader.close(); } } @@ -121,105 +124,125 @@ IndexReader reader = IndexReader.open(dir, null, true, indexDivisor); - TermDocs tdocs = reader.termDocs(); + DocsEnum tdocs = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + ta.field(), + new BytesRef(ta.text())); // without optimization (assumption skipInterval == 16) // with next - tdocs.seek(ta); - assertTrue(tdocs.next()); - assertEquals(0, tdocs.doc()); + assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(0, tdocs.docID()); assertEquals(4, tdocs.freq()); - assertTrue(tdocs.next()); - assertEquals(1, tdocs.doc()); + assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(1, tdocs.docID()); assertEquals(4, tdocs.freq()); - assertTrue(tdocs.skipTo(0)); - assertEquals(2, tdocs.doc()); - assertTrue(tdocs.skipTo(4)); - assertEquals(4, tdocs.doc()); - assertTrue(tdocs.skipTo(9)); - assertEquals(9, tdocs.doc()); - assertFalse(tdocs.skipTo(10)); + assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS); + assertEquals(2, tdocs.docID()); + assertTrue(tdocs.advance(4) != DocsEnum.NO_MORE_DOCS); + assertEquals(4, tdocs.docID()); + assertTrue(tdocs.advance(9) != DocsEnum.NO_MORE_DOCS); + assertEquals(9, tdocs.docID()); + assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS); // without next - tdocs.seek(ta); - assertTrue(tdocs.skipTo(0)); - assertEquals(0, tdocs.doc()); - assertTrue(tdocs.skipTo(4)); - assertEquals(4, tdocs.doc()); - assertTrue(tdocs.skipTo(9)); - assertEquals(9, tdocs.doc()); - assertFalse(tdocs.skipTo(10)); + tdocs = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + ta.field(), + new BytesRef(ta.text())); + assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS); + assertEquals(0, tdocs.docID()); + assertTrue(tdocs.advance(4) != DocsEnum.NO_MORE_DOCS); + assertEquals(4, tdocs.docID()); + assertTrue(tdocs.advance(9) != DocsEnum.NO_MORE_DOCS); + assertEquals(9, tdocs.docID()); + assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS); + // exactly skipInterval documents and therefore with optimization // with next - tdocs.seek(tb); - assertTrue(tdocs.next()); - assertEquals(10, tdocs.doc()); + tdocs = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + tb.field(), + new BytesRef(tb.text())); + + assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(10, tdocs.docID()); assertEquals(4, tdocs.freq()); - assertTrue(tdocs.next()); - assertEquals(11, tdocs.doc()); + assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(11, tdocs.docID()); assertEquals(4, tdocs.freq()); - assertTrue(tdocs.skipTo(5)); - assertEquals(12, tdocs.doc()); - assertTrue(tdocs.skipTo(15)); - assertEquals(15, tdocs.doc()); - assertTrue(tdocs.skipTo(24)); - assertEquals(24, tdocs.doc()); - assertTrue(tdocs.skipTo(25)); - assertEquals(25, tdocs.doc()); - assertFalse(tdocs.skipTo(26)); + assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS); + assertEquals(12, tdocs.docID()); + assertTrue(tdocs.advance(15) != DocsEnum.NO_MORE_DOCS); + assertEquals(15, tdocs.docID()); + assertTrue(tdocs.advance(24) != DocsEnum.NO_MORE_DOCS); + assertEquals(24, tdocs.docID()); + assertTrue(tdocs.advance(25) != DocsEnum.NO_MORE_DOCS); + assertEquals(25, tdocs.docID()); + assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS); // without next - tdocs.seek(tb); - assertTrue(tdocs.skipTo(5)); - assertEquals(10, tdocs.doc()); - assertTrue(tdocs.skipTo(15)); - assertEquals(15, tdocs.doc()); - assertTrue(tdocs.skipTo(24)); - assertEquals(24, tdocs.doc()); - assertTrue(tdocs.skipTo(25)); - assertEquals(25, tdocs.doc()); - assertFalse(tdocs.skipTo(26)); + tdocs = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + tb.field(), + new BytesRef(tb.text())); + assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS); + assertEquals(10, tdocs.docID()); + assertTrue(tdocs.advance(15) != DocsEnum.NO_MORE_DOCS); + assertEquals(15, tdocs.docID()); + assertTrue(tdocs.advance(24) != DocsEnum.NO_MORE_DOCS); + assertEquals(24, tdocs.docID()); + assertTrue(tdocs.advance(25) != DocsEnum.NO_MORE_DOCS); + assertEquals(25, tdocs.docID()); + assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS); + // much more than skipInterval documents and therefore with optimization // with next - tdocs.seek(tc); - assertTrue(tdocs.next()); - assertEquals(26, tdocs.doc()); + tdocs = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + tc.field(), + new BytesRef(tc.text())); + + assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(26, tdocs.docID()); assertEquals(4, tdocs.freq()); - assertTrue(tdocs.next()); - assertEquals(27, tdocs.doc()); + assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS); + assertEquals(27, tdocs.docID()); assertEquals(4, tdocs.freq()); - assertTrue(tdocs.skipTo(5)); - assertEquals(28, tdocs.doc()); - assertTrue(tdocs.skipTo(40)); - assertEquals(40, tdocs.doc()); - assertTrue(tdocs.skipTo(57)); - assertEquals(57, tdocs.doc()); - assertTrue(tdocs.skipTo(74)); - assertEquals(74, tdocs.doc()); - assertTrue(tdocs.skipTo(75)); - assertEquals(75, tdocs.doc()); - assertFalse(tdocs.skipTo(76)); + assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS); + assertEquals(28, tdocs.docID()); + assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS); + assertEquals(40, tdocs.docID()); + assertTrue(tdocs.advance(57) != DocsEnum.NO_MORE_DOCS); + assertEquals(57, tdocs.docID()); + assertTrue(tdocs.advance(74) != DocsEnum.NO_MORE_DOCS); + assertEquals(74, tdocs.docID()); + assertTrue(tdocs.advance(75) != DocsEnum.NO_MORE_DOCS); + assertEquals(75, tdocs.docID()); + assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS); //without next - tdocs.seek(tc); - assertTrue(tdocs.skipTo(5)); - assertEquals(26, tdocs.doc()); - assertTrue(tdocs.skipTo(40)); - assertEquals(40, tdocs.doc()); - assertTrue(tdocs.skipTo(57)); - assertEquals(57, tdocs.doc()); - assertTrue(tdocs.skipTo(74)); - assertEquals(74, tdocs.doc()); - assertTrue(tdocs.skipTo(75)); - assertEquals(75, tdocs.doc()); - assertFalse(tdocs.skipTo(76)); + tdocs = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + tc.field(), + new BytesRef(tc.text())); + assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS); + assertEquals(26, tdocs.docID()); + assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS); + assertEquals(40, tdocs.docID()); + assertTrue(tdocs.advance(57) != DocsEnum.NO_MORE_DOCS); + assertEquals(57, tdocs.docID()); + assertTrue(tdocs.advance(74) != DocsEnum.NO_MORE_DOCS); + assertEquals(74, tdocs.docID()); + assertTrue(tdocs.advance(75) != DocsEnum.NO_MORE_DOCS); + assertEquals(75, tdocs.docID()); + assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS); - tdocs.close(); reader.close(); dir.close(); } Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -1379,8 +1379,11 @@ assertEquals(1, reader.numDocs()); Term t = new Term("field", "a"); assertEquals(1, reader.docFreq(t)); - TermDocs td = reader.termDocs(t); - td.next(); + DocsEnum td = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + "field", + new BytesRef("a")); + td.nextDoc(); assertEquals(128*1024, td.freq()); reader.close(); dir.close(); @@ -1701,9 +1704,13 @@ // Make sure the doc that hit the exception was marked // as deleted: - TermDocs tdocs = reader.termDocs(t); + DocsEnum tdocs = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + t.field(), + new BytesRef(t.text())); + int count = 0; - while(tdocs.next()) { + while(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS) { count++; } assertEquals(2, count); @@ -2244,9 +2251,12 @@ // Quick test to make sure index is not corrupt: IndexReader reader = IndexReader.open(dir, true); - TermDocs tdocs = reader.termDocs(new Term("field", "aaa")); + DocsEnum tdocs = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + "field", + new BytesRef("aaa")); int count = 0; - while(tdocs.next()) { + while(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS) { count++; } assertTrue(count > 0); @@ -3454,8 +3464,13 @@ Query q = new SpanTermQuery(new Term("field", "a")); hits = s.search(q, null, 1000).scoreDocs; assertEquals(1, hits.length); - TermPositions tps = s.getIndexReader().termPositions(new Term("field", "a")); - assertTrue(tps.next()); + + DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(s.getIndexReader(), + MultiFields.getDeletedDocs(s.getIndexReader()), + "field", + new BytesRef("a")); + + assertTrue(tps.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(1, tps.freq()); assertEquals(0, tps.nextPosition()); w.close(); @@ -4465,12 +4480,12 @@ // test that the terms were indexed. - assertTrue(ir.termDocs(new Term("binary","doc1field1")).next()); - assertTrue(ir.termDocs(new Term("binary","doc2field1")).next()); - assertTrue(ir.termDocs(new Term("binary","doc3field1")).next()); - assertTrue(ir.termDocs(new Term("string","doc1field2")).next()); - assertTrue(ir.termDocs(new Term("string","doc2field2")).next()); - assertTrue(ir.termDocs(new Term("string","doc3field2")).next()); + assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc1field1")).nextDoc() != DocsEnum.NO_MORE_DOCS); + assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc2field1")).nextDoc() != DocsEnum.NO_MORE_DOCS); + assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc3field1")).nextDoc() != DocsEnum.NO_MORE_DOCS); + assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc1field2")).nextDoc() != DocsEnum.NO_MORE_DOCS); + assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc2field2")).nextDoc() != DocsEnum.NO_MORE_DOCS); + assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc3field2")).nextDoc() != DocsEnum.NO_MORE_DOCS); ir.close(); dir.close(); Index: lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java (working copy) @@ -21,7 +21,6 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -34,6 +33,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.BytesRef; /** * This testcase tests whether multi-level skipping is being used @@ -68,11 +68,12 @@ writer.close(); IndexReader reader = SegmentReader.getOnlySegmentReader(dir); - TermPositions tp = reader.termPositions(); for (int i = 0; i < 2; i++) { counter = 0; - tp.seek(term); + DocsAndPositionsEnum tp = reader.termPositionsEnum(reader.getDeletedDocs(), + term.field(), + new BytesRef(term.text())); checkSkipTo(tp, 14, 185); // no skips checkSkipTo(tp, 17, 190); // one skip on level 0 @@ -84,18 +85,18 @@ } } - public void checkSkipTo(TermPositions tp, int target, int maxCounter) throws IOException { - tp.skipTo(target); + public void checkSkipTo(DocsAndPositionsEnum tp, int target, int maxCounter) throws IOException { + tp.advance(target); if (maxCounter < counter) { fail("Too many bytes read: " + counter); } - assertEquals("Wrong document " + tp.doc() + " after skipTo target " + target, target, tp.doc()); + assertEquals("Wrong document " + tp.docID() + " after skipTo target " + target, target, tp.docID()); assertEquals("Frequency is not 1: " + tp.freq(), 1,tp.freq()); tp.nextPosition(); - byte[] b = new byte[1]; - tp.getPayload(b, 0); - assertEquals("Wrong payload for the target " + target + ": " + b[0], (byte) target, b[0]); + BytesRef b = tp.getPayload(); + assertEquals(1, b.length); + assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]); } private static class PayloadAnalyzer extends Analyzer { Index: lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy) @@ -38,6 +38,7 @@ import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.BytesRef; public class TestDocumentWriter extends LuceneTestCase { private RAMDirectory dir; @@ -128,8 +129,9 @@ writer.close(); SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); - TermPositions termPositions = reader.termPositions(new Term("repeated", "repeated")); - assertTrue(termPositions.next()); + DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, MultiFields.getDeletedDocs(reader), + "repeated", new BytesRef("repeated")); + assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); int freq = termPositions.freq(); assertEquals(2, freq); assertEquals(0, termPositions.nextPosition()); @@ -190,16 +192,16 @@ writer.close(); SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); - TermPositions termPositions = reader.termPositions(new Term("f1", "a")); - assertTrue(termPositions.next()); + DocsAndPositionsEnum termPositions = reader.fields().terms("f1").docsAndPositions(reader.getDeletedDocs(), new BytesRef("a"), null); + assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); int freq = termPositions.freq(); assertEquals(3, freq); assertEquals(0, termPositions.nextPosition()); - assertEquals(true, termPositions.isPayloadAvailable()); + assertEquals(true, termPositions.hasPayload()); assertEquals(6, termPositions.nextPosition()); - assertEquals(false, termPositions.isPayloadAvailable()); + assertEquals(false, termPositions.hasPayload()); assertEquals(7, termPositions.nextPosition()); - assertEquals(false, termPositions.isPayloadAvailable()); + assertEquals(false, termPositions.hasPayload()); } @@ -233,19 +235,19 @@ writer.close(); SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); - TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1")); - assertTrue(termPositions.next()); + DocsAndPositionsEnum termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getDeletedDocs(), new BytesRef("term1"), null); + assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertEquals(1, termPositions.freq()); assertEquals(0, termPositions.nextPosition()); - termPositions.seek(new Term("preanalyzed", "term2")); - assertTrue(termPositions.next()); + termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getDeletedDocs(), new BytesRef("term2"), null); + assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertEquals(2, termPositions.freq()); assertEquals(1, termPositions.nextPosition()); assertEquals(3, termPositions.nextPosition()); - termPositions.seek(new Term("preanalyzed", "term3")); - assertTrue(termPositions.next()); + termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getDeletedDocs(), new BytesRef("term3"), null); + assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertEquals(1, termPositions.freq()); assertEquals(2, termPositions.nextPosition()); Index: lucene/src/test/org/apache/lucene/index/TestOmitTf.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestOmitTf.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestOmitTf.java (working copy) @@ -87,10 +87,7 @@ f2.setOmitTermFreqAndPositions(false); d.add(f2); - Random rnd = newRandom(); - writer.addDocument(d); - FlexTestUtil.verifyFlexVsPreFlex(rnd, writer); // force merge writer.optimize(); @@ -98,8 +95,6 @@ writer.close(); _TestUtil.checkIndex(ram); - FlexTestUtil.verifyFlexVsPreFlex(rnd, ram); - SegmentReader reader = SegmentReader.getOnlySegmentReader(ram); FieldInfos fi = reader.fieldInfos(); assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions); @@ -145,12 +140,8 @@ for(int i=0;i<30;i++) writer.addDocument(d); - Random rnd = newRandom(); - FlexTestUtil.verifyFlexVsPreFlex(rnd, writer); - // force merge writer.optimize(); - FlexTestUtil.verifyFlexVsPreFlex(rnd, writer); // flush writer.close(); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java (working copy) @@ -38,6 +38,7 @@ import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ThreadInterruptedException; public class TestIndexWriterReader extends LuceneTestCase { @@ -63,12 +64,16 @@ public static int count(Term t, IndexReader r) throws IOException { int count = 0; - TermDocs td = r.termDocs(t); - while (td.next()) { - td.doc(); - count++; + DocsEnum td = MultiFields.getTermDocsEnum(r, + MultiFields.getDeletedDocs(r), + t.field(), new BytesRef(t.text())); + + if (td != null) { + while (td.nextDoc() != DocsEnum.NO_MORE_DOCS) { + td.docID(); + count++; + } } - td.close(); return count; } Index: lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; import java.io.IOException; @@ -158,35 +159,29 @@ MultiReader mr3 = new MultiReader(readers2); // test mixing up TermDocs and TermEnums from different readers. - TermDocs td2 = mr2.termDocs(); - TermEnum te3 = mr3.terms(new Term("body","wow")); - td2.seek(te3); + TermsEnum te2 = MultiFields.getTerms(mr2, "body").iterator(); + te2.seek(new BytesRef("wow")); + DocsEnum td = MultiFields.getTermDocsEnum(mr2, + MultiFields.getDeletedDocs(mr2), + "body", + te2.term()); + + TermsEnum te3 = MultiFields.getTerms(mr3, "body").iterator(); + te3.seek(new BytesRef("wow")); + td = te3.docs(MultiFields.getDeletedDocs(mr3), + td); + int ret = 0; // This should blow up if we forget to check that the TermEnum is from the same // reader as the TermDocs. - while (td2.next()) ret += td2.doc(); - td2.close(); - te3.close(); + while (td.nextDoc() != td.NO_MORE_DOCS) ret += td.docID(); // really a dummy assert to ensure that we got some docs and to ensure that // nothing is optimized out. assertTrue(ret > 0); } - public void testAllTermDocs() throws IOException { - IndexReader reader = openReader(); - int NUM_DOCS = 2; - TermDocs td = reader.termDocs(null); - for(int i=0;i stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); assertTrue(stored != null); Index: lucene/src/test/org/apache/lucene/index/TestStressIndexing.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestStressIndexing.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestStressIndexing.java (working copy) @@ -153,8 +153,6 @@ modifier.close(); - FlexTestUtil.verifyFlexVsPreFlex(RANDOM, directory); - for(int i=0;i= 0); } Index: lucene/src/test/org/apache/lucene/index/TestTermEnumSurrogate.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestTermEnumSurrogate.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestTermEnumSurrogate.java (working copy) @@ -1,53 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.LuceneTestCase; - -/** - * Back-compat test that you can seek to a lead surrogate in the term - * dictionary. With the old lucene API, this worked, due to the fact that the - * Term itself did not need to be converted into proper UTF-8 bytes. - * - * With the new API the provided Term text must be encodeable into UTF-8. - * - * @deprecated Remove this when the old API is no longer supported. - */ -@Deprecated -public class TestTermEnumSurrogate extends LuceneTestCase { - public void testSeekSurrogate() throws Exception { - RAMDirectory dir = new RAMDirectory(); - IndexWriter writer = new IndexWriter(dir, new MockAnalyzer(), - IndexWriter.MaxFieldLength.UNLIMITED); - Document d = new Document(); - Field f = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED); - d.add(f); - f.setValue("abacadaba"); - writer.addDocument(d); - f.setValue("𩬅"); - writer.addDocument(d); - writer.close(); - IndexReader reader = IndexReader.open(dir, true); - TermEnum te = reader.terms(new Term("field", "𩬅".substring(0, 1))); - assertEquals(new Term("field", "𩬅"), te.term()); - } -} Index: lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 954967) +++ lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -129,20 +129,15 @@ }; public void testOptimizeOldIndex() throws Exception { - Random rand = newRandom(); - for(int i=0;i allTerms = new ArrayList(); - //System.out.println("TEST: now verify!!"); - testStraightEnum(r); - testRandomSkips(rand, r); - testRandomSeeks(rand, r); - testBogusFieldTerms(rand, r); - } - - private static void testBogusFieldTerms(Random rand, IndexReader r) throws Exception { - final Fields fields = MultiFields.getFields(r); - if (fields == null) { - return; - } - for(int i=0;i<10;i++) { - final String f = "bogus" + rand.nextInt() + "reallybogus"; - Terms terms = fields.terms(f); - assertTrue(terms == null || terms.iterator().next() == null); - } - } - - private static void testStraightEnum(IndexReader r) throws Exception { - - // straight enum of fields/terms/docs/positions - TermEnum termEnum = r.terms(); - final Fields fields = MultiFields.getFields(r); - if (fields == null) { - return; - } - FieldsEnum fieldsEnum = fields.iterator(); - while(true) { - final String field = fieldsEnum.next(); - if (field == null) { - boolean result = termEnum.next(); - if (result) { - System.out.println("got unexpected term=" + termEnum.term() + " termEnum=" + termEnum); - } - assertFalse(result); - break; - } - TermsEnum terms = fieldsEnum.terms(); - DocsAndPositionsEnum postings = null; - DocsEnum docsEnum = null; - final TermPositions termPos = r.termPositions(); - while(true) { - final BytesRef termRef = terms.next(); - if (termRef == null) { - break; - } else { - assertTrue(termEnum.next()); - Term t = termEnum.term(); - assertEquals(t.field(), field); - assertEquals(t.text(), termRef.utf8ToString()); - assertEquals(termEnum.docFreq(), terms.docFreq()); - //allTerms.add(t); - - postings = terms.docsAndPositions(MultiFields.getDeletedDocs(r), postings); - docsEnum = terms.docs(MultiFields.getDeletedDocs(r), docsEnum); - - final DocsEnum docs; - if (postings != null) { - docs = postings; - } else { - docs = docsEnum; - } - - termPos.seek(t); - while(true) { - final int doc = docs.nextDoc(); - if (doc == DocsEnum.NO_MORE_DOCS) { - assertFalse(termPos.next()); - break; - } else { - assertTrue(termPos.next()); - assertEquals(termPos.doc(), doc); - assertEquals(termPos.freq(), docs.freq()); - final int freq = docs.freq(); - if (postings == null) { - assertEquals(1, freq); - // Old API did not always do this, - // specifically in the MultiTermPositions - // case when some segs omit positions and - // some don't - //assertEquals(0, termPos.nextPosition()); - assertEquals(false, termPos.isPayloadAvailable()); - } else { - for(int i=0;i A Token can optionally have metadata (a.k.a. Payload) in the form of a variable - length byte array. Use {@link TermPositions#getPayloadLength()} and - {@link TermPositions#getPayload(byte[], int)} to retrieve the payloads from the index. + length byte array. Use {@link DocsAndPositionsEnum#getPayloadLength()} and + {@link DocsAndPositionsEnum#getPayload(byte[], int)} to retrieve the payloads from the index.

Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -32,7 +32,6 @@ import org.apache.lucene.queryParser.QueryParser; // for javadoc import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; -import org.apache.lucene.util.VirtualMethod; /** * An abstract {@link Query} that matches documents @@ -72,17 +71,6 @@ protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; transient int numberOfTerms = 0; - /** @deprecated remove when getEnum is removed */ - private static final VirtualMethod getEnumMethod = - new VirtualMethod(MultiTermQuery.class, "getEnum", IndexReader.class); - /** @deprecated remove when getEnum is removed */ - private static final VirtualMethod getTermsEnumMethod = - new VirtualMethod(MultiTermQuery.class, "getTermsEnum", IndexReader.class); - /** @deprecated remove when getEnum is removed */ - final boolean hasNewAPI = - VirtualMethod.compareImplementationDistance(getClass(), - getTermsEnumMethod, getEnumMethod) >= 0; // its ok for both to be overridden - /** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link #getTermsEnum} * and update the boost on each returned term. This enables to control the boost factor * for each matching term in {@link #SCORING_BOOLEAN_QUERY_REWRITE} or @@ -190,64 +178,42 @@ protected final int collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { - if (query.hasNewAPI) { + if (query.field == null) { + throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery."); + } - if (query.field == null) { - throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery."); - } + final Fields fields = MultiFields.getFields(reader); + if (fields == null) { + // reader has no fields + return 0; + } - final Fields fields = MultiFields.getFields(reader); - if (fields == null) { - // reader has no fields - return 0; - } + final Terms terms = fields.terms(query.field); + if (terms == null) { + // field does not exist + return 0; + } - final Terms terms = fields.terms(query.field); - if (terms == null) { - // field does not exist - return 0; - } + final TermsEnum termsEnum = query.getTermsEnum(reader); + assert termsEnum != null; - final TermsEnum termsEnum = query.getTermsEnum(reader); - assert termsEnum != null; - - if (termsEnum == TermsEnum.EMPTY) - return 0; - final BoostAttribute boostAtt = - termsEnum.attributes().addAttribute(BoostAttribute.class); - collector.boostAtt = boostAtt; - int count = 0; - BytesRef term; - final Term placeholderTerm = new Term(query.field); - while ((term = termsEnum.next()) != null) { - if (collector.collect(placeholderTerm.createTerm(term.utf8ToString()), boostAtt.getBoost())) { - count++; - } else { - break; - } + if (termsEnum == TermsEnum.EMPTY) + return 0; + final BoostAttribute boostAtt = + termsEnum.attributes().addAttribute(BoostAttribute.class); + collector.boostAtt = boostAtt; + int count = 0; + BytesRef term; + final Term placeholderTerm = new Term(query.field); + while ((term = termsEnum.next()) != null) { + if (collector.collect(placeholderTerm.createTerm(term.utf8ToString()), boostAtt.getBoost())) { + count++; + } else { + break; } - collector.boostAtt = null; - return count; - } else { - // deprecated case - final FilteredTermEnum enumerator = query.getEnum(reader); - int count = 0; - try { - do { - Term t = enumerator.term(); - if (t != null) { - if (collector.collect(t, enumerator.difference())) { - count++; - } else { - break; - } - } - } while (enumerator.next()); - } finally { - enumerator.close(); - } - return count; } + collector.boostAtt = null; + return count; } protected static abstract class TermCollector { @@ -699,24 +665,13 @@ public final String getField() { return field; } /** Construct the enumeration to be used, expanding the - * pattern term. - * @deprecated Please override {@link #getTermsEnum} instead */ - @Deprecated - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - throw new UnsupportedOperationException(); - } - - /** Construct the enumeration to be used, expanding the * pattern term. This method should only be called if * the field exists (ie, implementations can assume the * field does exist). This method should not return null * (should instead return {@link TermsEnum#EMPTY} if no * terms match). The TermsEnum must already be * positioned to the first matching term. */ - // TODO 4.0: make this method abstract - protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - throw new UnsupportedOperationException(); - } + protected abstract TermsEnum getTermsEnum(IndexReader reader) throws IOException; /** * Expert: Return the number of unique terms visited during execution of the query. Index: lucene/src/java/org/apache/lucene/search/PrefixTermEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/PrefixTermEnum.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/PrefixTermEnum.java (working copy) @@ -1,68 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; - -/** - * Subclass of FilteredTermEnum for enumerating all terms that match the - * specified prefix filter term. - *

- * Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. - * - * @deprecated Use {@link PrefixTermsEnum} instead. - */ -@Deprecated -public class PrefixTermEnum extends FilteredTermEnum { - - private final Term prefix; - private boolean endEnum = false; - - public PrefixTermEnum(IndexReader reader, Term prefix) throws IOException { - this.prefix = prefix; - - setEnum(reader.terms(new Term(prefix.field(), prefix.text()))); - } - - @Override - public float difference() { - return 1.0f; - } - - @Override - protected boolean endEnum() { - return endEnum; - } - - protected Term getPrefixTerm() { - return prefix; - } - - @Override - protected boolean termCompare(Term term) { - if (term.field() == prefix.field() && term.text().startsWith(prefix.text())) { - return true; - } - endEnum = true; - return false; - } -} Index: lucene/src/java/org/apache/lucene/search/FuzzyQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FuzzyQuery.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/FuzzyQuery.java (working copy) @@ -130,14 +130,6 @@ return prefixLength; } - @Override @Deprecated - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - if (!termLongEnough) { // can only match if it's exact - return new SingleTermEnum(reader, term); - } - return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength); - } - @Override protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { if (!termLongEnough) { // can only match if it's exact Index: lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy) @@ -20,11 +20,8 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; import org.apache.lucene.index.Fields; import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.MultiFields; @@ -109,97 +106,54 @@ */ @Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - if (query.hasNewAPI) { - if (query.field == null) { - throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery."); - } + if (query.field == null) { + throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery."); + } - final Fields fields = MultiFields.getFields(reader); - if (fields == null) { - // reader has no fields - return DocIdSet.EMPTY_DOCIDSET; - } + final Fields fields = MultiFields.getFields(reader); + if (fields == null) { + // reader has no fields + return DocIdSet.EMPTY_DOCIDSET; + } - final Terms terms = fields.terms(query.field); - if (terms == null) { - // field does not exist - return DocIdSet.EMPTY_DOCIDSET; - } + final Terms terms = fields.terms(query.field); + if (terms == null) { + // field does not exist + return DocIdSet.EMPTY_DOCIDSET; + } - final TermsEnum termsEnum = query.getTermsEnum(reader); - assert termsEnum != null; - if (termsEnum.next() != null) { - // fill into a OpenBitSet - final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); - int termCount = 0; - final Bits delDocs = MultiFields.getDeletedDocs(reader); - DocsEnum docsEnum = null; - do { - termCount++; - // System.out.println(" iter termCount=" + termCount + " term=" + - // enumerator.term().toBytesString()); - docsEnum = termsEnum.docs(delDocs, docsEnum); - final DocsEnum.BulkReadResult result = docsEnum.getBulkResult(); - while (true) { - final int count = docsEnum.read(); - if (count != 0) { - final int[] docs = result.docs.ints; - for (int i = 0; i < count; i++) { - bitSet.set(docs[i]); - } - } else { - break; + final TermsEnum termsEnum = query.getTermsEnum(reader); + assert termsEnum != null; + if (termsEnum.next() != null) { + // fill into a OpenBitSet + final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); + int termCount = 0; + final Bits delDocs = MultiFields.getDeletedDocs(reader); + DocsEnum docsEnum = null; + do { + termCount++; + // System.out.println(" iter termCount=" + termCount + " term=" + + // enumerator.term().toBytesString()); + docsEnum = termsEnum.docs(delDocs, docsEnum); + final DocsEnum.BulkReadResult result = docsEnum.getBulkResult(); + while (true) { + final int count = docsEnum.read(); + if (count != 0) { + final int[] docs = result.docs.ints; + for (int i = 0; i < count; i++) { + bitSet.set(docs[i]); } + } else { + break; } - } while (termsEnum.next() != null); - // System.out.println(" done termCount=" + termCount); + } + } while (termsEnum.next() != null); + // System.out.println(" done termCount=" + termCount); - query.incTotalNumberOfTerms(termCount); - return bitSet; - } else { - return DocIdSet.EMPTY_DOCIDSET; - } + query.incTotalNumberOfTerms(termCount); + return bitSet; } else { - final TermEnum enumerator = query.getEnum(reader); - try { - // if current term in enum is null, the enum is empty -> shortcut - if (enumerator.term() == null) - return DocIdSet.EMPTY_DOCIDSET; - // else fill into a OpenBitSet - final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); - final int[] docs = new int[32]; - final int[] freqs = new int[32]; - TermDocs termDocs = reader.termDocs(); - try { - int termCount = 0; - do { - Term term = enumerator.term(); - if (term == null) - break; - termCount++; - termDocs.seek(term); - while (true) { - final int count = termDocs.read(docs, freqs); - if (count != 0) { - for (int i = 0; i < count; i++) { - bitSet.set(docs[i]); - } - } else { - break; - } - } - } while (enumerator.next()); - - query.incTotalNumberOfTerms(termCount); - - } finally { - termDocs.close(); - } - return bitSet; - } finally { - enumerator.close(); - } + return DocIdSet.EMPTY_DOCIDSET; } } - } Index: lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/FuzzyTermEnum.java (working copy) @@ -1,288 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; - -/** Subclass of FilteredTermEnum for enumerating all terms that are similar - * to the specified filter term. - * - *

Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. - * - * @deprecated Please use {@link FuzzyTermsEnum} instead. - */ -@Deprecated -public final class FuzzyTermEnum extends FilteredTermEnum { - - /* Allows us save time required to create a new array - * every time similarity is called. - */ - private int[] p; - private int[] d; - - private float similarity; - private boolean endEnum = false; - - private Term searchTerm = null; - private final String field; - private final String text; - private final String prefix; - - private final float minimumSimilarity; - private final float scale_factor; - - /** - * Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f. - *

- * After calling the constructor the enumeration is already pointing to the first - * valid term if such a term exists. - * - * @param reader - * @param term - * @throws IOException - * @see #FuzzyTermEnum(IndexReader, Term, float, int) - */ - public FuzzyTermEnum(IndexReader reader, Term term) throws IOException { - this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength); - } - - /** - * Creates a FuzzyTermEnum with an empty prefix. - *

- * After calling the constructor the enumeration is already pointing to the first - * valid term if such a term exists. - * - * @param reader - * @param term - * @param minSimilarity - * @throws IOException - * @see #FuzzyTermEnum(IndexReader, Term, float, int) - */ - public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException { - this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength); - } - - /** - * Constructor for enumeration of all terms from specified reader which share a prefix of - * length prefixLength with term and which have a fuzzy similarity > - * minSimilarity. - *

- * After calling the constructor the enumeration is already pointing to the first - * valid term if such a term exists. - * - * @param reader Delivers terms. - * @param term Pattern term. - * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f. - * @param prefixLength Length of required common prefix. Default value is 0. - * @throws IOException - */ - public FuzzyTermEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException { - super(); - - if (minSimilarity >= 1.0f) - throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1"); - else if (minSimilarity < 0.0f) - throw new IllegalArgumentException("minimumSimilarity cannot be less than 0"); - if(prefixLength < 0) - throw new IllegalArgumentException("prefixLength cannot be less than 0"); - - this.minimumSimilarity = minSimilarity; - this.scale_factor = 1.0f / (1.0f - minimumSimilarity); - this.searchTerm = term; - this.field = searchTerm.field(); - - //The prefix could be longer than the word. - //It's kind of silly though. It means we must match the entire word. - final int fullSearchTermLength = searchTerm.text().length(); - final int realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength; - - this.text = searchTerm.text().substring(realPrefixLength); - this.prefix = searchTerm.text().substring(0, realPrefixLength); - - this.p = new int[this.text.length()+1]; - this.d = new int[this.text.length()+1]; - - setEnum(reader.terms(new Term(searchTerm.field(), prefix))); - } - - /** - * The termCompare method in FuzzyTermEnum uses Levenshtein distance to - * calculate the distance between the given term and the comparing term. - */ - @Override - protected final boolean termCompare(Term term) { - if (field == term.field() && term.text().startsWith(prefix)) { - final String target = term.text().substring(prefix.length()); - this.similarity = similarity(target); - return (similarity > minimumSimilarity); - } - endEnum = true; - return false; - } - - /** @deprecated Use {@link MultiTermQuery.BoostAttribute} together with {@link FuzzyTermsEnum} */ - @Deprecated - @Override - public final float difference() { - return (similarity - minimumSimilarity) * scale_factor; - } - - /** {@inheritDoc} */ - @Override - public final boolean endEnum() { - return endEnum; - } - - /****************************** - * Compute Levenshtein distance - ******************************/ - - /** - *

Similarity returns a number that is 1.0f or less (including negative numbers) - * based on how similar the Term is compared to a target term. It returns - * exactly 0.0f when - *

-   *    editDistance > maximumEditDistance
- * Otherwise it returns: - *
-   *    1 - (editDistance / length)
- * where length is the length of the shortest term (text or target) including a - * prefix that are identical and editDistance is the Levenshtein distance for - * the two words.

- * - *

Embedded within this algorithm is a fail-fast Levenshtein distance - * algorithm. The fail-fast algorithm differs from the standard Levenshtein - * distance algorithm in that it is aborted if it is discovered that the - * minimum distance between the words is greater than some threshold. - * - *

To calculate the maximum distance threshold we use the following formula: - *

-   *     (1 - minimumSimilarity) * length
- * where length is the shortest term including any prefix that is not part of the - * similarity comparison. This formula was derived by solving for what maximum value - * of distance returns false for the following statements: - *
-   *   similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
-   *   return (similarity > minimumSimilarity);
- * where distance is the Levenshtein distance for the two words. - *

- *

Levenshtein distance (also known as edit distance) is a measure of similarity - * between two strings where the distance is measured as the number of character - * deletions, insertions or substitutions required to transform one string to - * the other string. - * @param target the target word or phrase - * @return the similarity, 0.0 or less indicates that it matches less than the required - * threshold and 1.0 indicates that the text and target are identical - */ - private float similarity(final String target) { - final int m = target.length(); - final int n = text.length(); - if (n == 0) { - //we don't have anything to compare. That means if we just add - //the letters for m we get the new word - return prefix.length() == 0 ? 0.0f : 1.0f - ((float) m / prefix.length()); - } - if (m == 0) { - return prefix.length() == 0 ? 0.0f : 1.0f - ((float) n / prefix.length()); - } - - final int maxDistance = calculateMaxDistance(m); - - if (maxDistance < Math.abs(m-n)) { - //just adding the characters of m to n or vice-versa results in - //too many edits - //for example "pre" length is 3 and "prefixes" length is 8. We can see that - //given this optimal circumstance, the edit distance cannot be less than 5. - //which is 8-3 or more precisely Math.abs(3-8). - //if our maximum edit distance is 4, then we can discard this word - //without looking at it. - return 0.0f; - } - - // init matrix d - for (int i = 0; i<=n; ++i) { - p[i] = i; - } - - // start computing edit distance - for (int j = 1; j<=m; ++j) { // iterates through target - int bestPossibleEditDistance = m; - final char t_j = target.charAt(j-1); // jth character of t - d[0] = j; - - for (int i=1; i<=n; ++i) { // iterates through text - // minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1) - if (t_j != text.charAt(i-1)) { - d[i] = Math.min(Math.min(d[i-1], p[i]), p[i-1]) + 1; - } else { - d[i] = Math.min(Math.min(d[i-1]+1, p[i]+1), p[i-1]); - } - bestPossibleEditDistance = Math.min(bestPossibleEditDistance, d[i]); - } - - //After calculating row i, the best possible edit distance - //can be found by found by finding the smallest value in a given column. - //If the bestPossibleEditDistance is greater than the max distance, abort. - - if (j > maxDistance && bestPossibleEditDistance > maxDistance) { //equal is okay, but not greater - //the closest the target can be to the text is just too far away. - //this target is leaving the party early. - return 0.0f; - } - - // copy current distance counts to 'previous row' distance counts: swap p and d - int _d[] = p; - p = d; - d = _d; - } - - // our last action in the above loop was to switch d and p, so p now - // actually has the most recent cost counts - - // this will return less than 0.0 when the edit distance is - // greater than the number of characters in the shorter word. - // but this was the formula that was previously used in FuzzyTermEnum, - // so it has not been changed (even though minimumSimilarity must be - // greater than 0.0) - return 1.0f - ((float)p[n] / (float) (prefix.length() + Math.min(n, m))); - } - - /** - * The max Distance is the maximum Levenshtein distance for the text - * compared to some other value that results in score that is - * better than the minimum similarity. - * @param m the length of the "other value" - * @return the maximum levenshtein distance that we care about - */ - private int calculateMaxDistance(int m) { - return (int) ((1-minimumSimilarity) * (Math.min(text.length(), m) + prefix.length())); - } - - /** {@inheritDoc} */ - @Override - public void close() throws IOException { - p = d = null; - searchTerm = null; - super.close(); //call super.close() and let the garbage collector do its work. - } - -} Index: lucene/src/java/org/apache/lucene/search/PrefixQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/PrefixQuery.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/PrefixQuery.java (working copy) @@ -44,11 +44,6 @@ /** Returns the prefix of this query. */ public Term getPrefix() { return prefix; } - @Override @Deprecated - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new PrefixTermEnum(reader, prefix); - } - @Override protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { if (prefix.text().length() == 0) { Index: lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (working copy) @@ -22,7 +22,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.index.TermDocs; // for javadocs /** * A {@link Filter} that only accepts documents whose single @@ -70,7 +69,7 @@ * * In contrast, TermsFilter builds up an {@link OpenBitSet}, * keyed by docID, every time it's created, by enumerating - * through all matching docs using {@link TermDocs} to seek + * through all matching docs using {@link DocsEnum} to seek * and scan through each term's docID list. While there is * no linear scan of all docIDs, besides the allocation of * the underlying array in the {@link OpenBitSet}, this Index: lucene/src/java/org/apache/lucene/search/TermRangeTermEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeTermEnum.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/TermRangeTermEnum.java (working copy) @@ -1,156 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.text.Collator; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.util.StringHelper; - -/** - * Subclass of FilteredTermEnum for enumerating all terms that match the - * specified range parameters. - *

- * Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. - * @since 2.9 - * @deprecated Please switch to {@link TermRangeTermsEnum} - */ -@Deprecated -public class TermRangeTermEnum extends FilteredTermEnum { - - private Collator collator = null; - private boolean endEnum = false; - private String field; - private String upperTermText; - private String lowerTermText; - private boolean includeLower; - private boolean includeUpper; - - /** - * Enumerates all terms greater/equal than lowerTerm - * but less/equal than upperTerm. - * - * If an endpoint is null, it is said to be "open". Either or both - * endpoints may be open. Open endpoints may not be exclusive - * (you can't select all but the first or last term without - * explicitly specifying the term to exclude.) - * - * @param reader - * @param field - * An interned field that holds both lower and upper terms. - * @param lowerTermText - * The term text at the lower end of the range - * @param upperTermText - * The term text at the upper end of the range - * @param includeLower - * If true, the lowerTerm is included in the range. - * @param includeUpper - * If true, the upperTerm is included in the range. - * @param collator - * The collator to use to collate index Terms, to determine their - * membership in the range bounded by lowerTerm and - * upperTerm. - * - * @throws IOException - */ - public TermRangeTermEnum(IndexReader reader, String field, String lowerTermText, String upperTermText, - boolean includeLower, boolean includeUpper, Collator collator) throws IOException { - this.collator = collator; - this.upperTermText = upperTermText; - this.lowerTermText = lowerTermText; - this.includeLower = includeLower; - this.includeUpper = includeUpper; - this.field = StringHelper.intern(field); - - // do a little bit of normalization... - // open ended range queries should always be inclusive. - if (this.lowerTermText == null) { - this.lowerTermText = ""; - this.includeLower = true; - } - - if (this.upperTermText == null) { - this.includeUpper = true; - } - - String startTermText = collator == null ? this.lowerTermText : ""; - setEnum(reader.terms(new Term(this.field, startTermText))); - } - - @Override - public float difference() { - return 1.0f; - } - - @Override - protected boolean endEnum() { - return endEnum; - } - - @Override - protected boolean termCompare(Term term) { - if (collator == null) { - // Use Unicode code point ordering - boolean checkLower = false; - if (!includeLower) // make adjustments to set to exclusive - checkLower = true; - if (term != null && term.field() == field) { // interned comparison - if (!checkLower || null==lowerTermText || term.text().compareTo(lowerTermText) > 0) { - checkLower = false; - if (upperTermText != null) { - int compare = upperTermText.compareTo(term.text()); - /* - * if beyond the upper term, or is exclusive and this is equal to - * the upper term, break out - */ - if ((compare < 0) || - (!includeUpper && compare==0)) { - endEnum = true; - return false; - } - } - return true; - } - } else { - // break - endEnum = true; - return false; - } - return false; - } else { - if (term != null && term.field() == field) { // interned comparison - if ((lowerTermText == null - || (includeLower - ? collator.compare(term.text(), lowerTermText) >= 0 - : collator.compare(term.text(), lowerTermText) > 0)) - && (upperTermText == null - || (includeUpper - ? collator.compare(term.text(), upperTermText) <= 0 - : collator.compare(term.text(), upperTermText) < 0))) { - return true; - } - return false; - } - endEnum = true; - return false; - } - } -} Index: lucene/src/java/org/apache/lucene/search/WildcardTermEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/WildcardTermEnum.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/WildcardTermEnum.java (working copy) @@ -1,192 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; - -/** - * Subclass of FilteredTermEnum for enumerating all terms that match the - * specified wildcard filter term. - *

- * Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. - * @deprecated Please use {@link AutomatonTermsEnum} instead. - */ -@Deprecated -public class WildcardTermEnum extends FilteredTermEnum { - final Term searchTerm; - final String field; - final String text; - final String pre; - final int preLen; - boolean endEnum = false; - - /** - * Creates a new WildcardTermEnum. - *

- * After calling the constructor the enumeration is already pointing to the first - * valid term if such a term exists. - */ - public WildcardTermEnum(IndexReader reader, Term term) throws IOException { - super(); - searchTerm = term; - field = searchTerm.field(); - final String searchTermText = searchTerm.text(); - - final int sidx = searchTermText.indexOf(WILDCARD_STRING); - final int cidx = searchTermText.indexOf(WILDCARD_CHAR); - int idx = sidx; - if (idx == -1) { - idx = cidx; - } - else if (cidx >= 0) { - idx = Math.min(idx, cidx); - } - pre = idx != -1?searchTerm.text().substring(0,idx): ""; - - preLen = pre.length(); - text = searchTermText.substring(preLen); - setEnum(reader.terms(new Term(searchTerm.field(), pre))); - } - - @Override - protected final boolean termCompare(Term term) { - if (field == term.field()) { - String searchText = term.text(); - if (searchText.startsWith(pre)) { - return wildcardEquals(text, 0, searchText, preLen); - } - } - endEnum = true; - return false; - } - - @Override - public float difference() { - return 1.0f; - } - - @Override - public final boolean endEnum() { - return endEnum; - } - - /******************************************** - * String equality with support for wildcards - ********************************************/ - - public static final char WILDCARD_STRING = WildcardQuery.WILDCARD_STRING; - public static final char WILDCARD_CHAR = WildcardQuery.WILDCARD_CHAR; - - /** - * Determines if a word matches a wildcard pattern. - * Work released by Granta Design Ltd after originally being done on - * company time. - */ - public static final boolean wildcardEquals(String pattern, int patternIdx, - String string, int stringIdx) - { - int p = patternIdx; - - for (int s = stringIdx; ; ++p, ++s) - { - // End of string yet? - boolean sEnd = (s >= string.length()); - // End of pattern yet? - boolean pEnd = (p >= pattern.length()); - - // If we're looking at the end of the string... - if (sEnd) - { - // Assume the only thing left on the pattern is/are wildcards - boolean justWildcardsLeft = true; - - // Current wildcard position - int wildcardSearchPos = p; - // While we haven't found the end of the pattern, - // and haven't encountered any non-wildcard characters - while (wildcardSearchPos < pattern.length() && justWildcardsLeft) - { - // Check the character at the current position - char wildchar = pattern.charAt(wildcardSearchPos); - - // If it's not a wildcard character, then there is more - // pattern information after this/these wildcards. - if (wildchar != WILDCARD_CHAR && wildchar != WILDCARD_STRING) - { - justWildcardsLeft = false; - } - else - { - // to prevent "cat" matches "ca??" - if (wildchar == WILDCARD_CHAR) { - return false; - } - - // Look at the next character - wildcardSearchPos++; - } - } - - // This was a prefix wildcard search, and we've matched, so - // return true. - if (justWildcardsLeft) - { - return true; - } - } - - // If we've gone past the end of the string, or the pattern, - // return false. - if (sEnd || pEnd) - { - break; - } - - // Match a single character, so continue. - if (pattern.charAt(p) == WILDCARD_CHAR) - { - continue; - } - - // - if (pattern.charAt(p) == WILDCARD_STRING) - { - // Look at the character beyond the '*'. - ++p; - // Examine the string, starting at the last character. - for (int i = string.length(); i >= s; --i) - { - if (wildcardEquals(pattern, p, string, i)) - { - return true; - } - } - break; - } - if (pattern.charAt(p) != string.charAt(s)) - { - break; - } - } - return false; - } -} Index: lucene/src/java/org/apache/lucene/search/TermRangeQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (working copy) @@ -129,12 +129,6 @@ /** Returns the collator used to determine range inclusion, if any. */ public Collator getCollator() { return collator; } - @Override @Deprecated - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new TermRangeTermEnum(reader, field, lowerTerm, - upperTerm, includeLower, includeUpper, collator); - } - @Override protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) { Index: lucene/src/java/org/apache/lucene/search/SingleTermEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SingleTermEnum.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/SingleTermEnum.java (working copy) @@ -1,68 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; - -/** - * Subclass of FilteredTermEnum for enumerating a single term. - *

- * This can be used by {@link MultiTermQuery}s that need only visit one term, - * but want to preserve MultiTermQuery semantics such as - * {@link MultiTermQuery#rewriteMethod}. - */ -@Deprecated -public class SingleTermEnum extends FilteredTermEnum { - private Term singleTerm; - private boolean endEnum = false; - - /** - * Creates a new SingleTermEnum. - *

- * After calling the constructor the enumeration is already pointing to the term, - * if it exists. - */ - public SingleTermEnum(IndexReader reader, Term singleTerm) throws IOException { - super(); - this.singleTerm = singleTerm; - setEnum(reader.terms(singleTerm)); - } - - @Override - public float difference() { - return 1.0F; - } - - @Override - protected boolean endEnum() { - return endEnum; - } - - @Override - protected boolean termCompare(Term term) { - if (term.equals(singleTerm)) { - return true; - } else { - endEnum = true; - return false; - } - } -} Index: lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java (working copy) @@ -1,115 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; - -/** Abstract class for enumerating a subset of all terms. - -

Term enumerations are always ordered by Term.compareTo(). Each term in - the enumeration is greater than all that precede it. - - @deprecated Switch to {@link FilteredTermsEnum} instead. -*/ -@Deprecated -public abstract class FilteredTermEnum extends TermEnum { - /** the current term */ - protected Term currentTerm = null; - - /** the delegate enum - to set this member use {@link #setEnum} */ - protected TermEnum actualEnum = null; - - public FilteredTermEnum() {} - - /** Equality compare on the term */ - protected abstract boolean termCompare(Term term); - - /** Equality measure on the term, it is in reality a boost - * factor and used like so in {@link MultiTermQuery}, - * so the name is wrong. - * @deprecated Use {@link MultiTermQuery.BoostAttribute} - * together with {@link FilteredTermsEnum}. For example - * see {@link FuzzyTermsEnum} - */ - @Deprecated - public abstract float difference(); - - /** Indicates the end of the enumeration has been reached */ - protected abstract boolean endEnum(); - - /** - * use this method to set the actual TermEnum (e.g. in ctor), - * it will be automatically positioned on the first matching term. - */ - protected void setEnum(TermEnum actualEnum) throws IOException { - this.actualEnum = actualEnum; - // Find the first term that matches - Term term = actualEnum.term(); - if (term != null && termCompare(term)) - currentTerm = term; - else next(); - } - - /** - * Returns the docFreq of the current Term in the enumeration. - * Returns -1 if no Term matches or all terms have been enumerated. - */ - @Override - public int docFreq() { - if (currentTerm == null) return -1; - assert actualEnum != null; - return actualEnum.docFreq(); - } - - /** Increments the enumeration to the next element. True if one exists. */ - @Override - public boolean next() throws IOException { - if (actualEnum == null) return false; // the actual enumerator is not initialized! - currentTerm = null; - while (currentTerm == null) { - if (endEnum()) return false; - if (actualEnum.next()) { - Term term = actualEnum.term(); - if (termCompare(term)) { - currentTerm = term; - return true; - } - } - else return false; - } - currentTerm = null; - return false; - } - - /** Returns the current Term in the enumeration. - * Returns null if no Term matches or all terms have been enumerated. */ - @Override - public Term term() { - return currentTerm; - } - - /** Closes the enumeration to further activity, freeing resources. */ - @Override - public void close() throws IOException { - if (actualEnum != null) actualEnum.close(); - currentTerm = null; - actualEnum = null; - } -} Index: lucene/src/java/org/apache/lucene/search/WildcardQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/WildcardQuery.java (revision 954967) +++ lucene/src/java/org/apache/lucene/search/WildcardQuery.java (working copy) @@ -81,17 +81,6 @@ return BasicOperations.concatenate(automata); } - @Override @Deprecated - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new WildcardTermEnum(reader, term); - } - - // we override this method, else backwards layer in MTQ will prefer getEnum! - @Override - protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - return super.getTermsEnum(reader); - } - /** * Returns the pattern term. */ Index: lucene/src/java/org/apache/lucene/index/AllDocsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/AllDocsEnum.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/AllDocsEnum.java (working copy) @@ -1,78 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.index; - -import org.apache.lucene.util.Bits; -import java.io.IOException; - -class AllDocsEnum extends DocsEnum { - protected final Bits skipDocs; - protected final int maxDoc; - protected final IndexReader reader; - protected int doc = -1; - - protected AllDocsEnum(IndexReader reader, Bits skipDocs) { - this.skipDocs = skipDocs; - this.maxDoc = reader.maxDoc(); - this.reader = reader; - } - - @Override - public int freq() { - return 1; - } - - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() throws IOException { - return advance(doc+1); - } - - @Override - public int read() throws IOException { - final int[] docs = bulkResult.docs.ints; - final int[] freqs = bulkResult.freqs.ints; - int i = 0; - while (i < docs.length && doc < maxDoc) { - if (skipDocs == null || !skipDocs.get(doc)) { - docs[i] = doc; - freqs[i] = 1; - ++i; - } - doc++; - } - return i; - } - - @Override - public int advance(int target) throws IOException { - doc = target; - while (doc < maxDoc) { - if (skipDocs == null || !skipDocs.get(doc)) { - return doc; - } - doc++; - } - doc = NO_MORE_DOCS; - return doc; - } -} Index: lucene/src/java/org/apache/lucene/index/LegacySegmentMergeQueue.java =================================================================== --- lucene/src/java/org/apache/lucene/index/LegacySegmentMergeQueue.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/LegacySegmentMergeQueue.java (working copy) @@ -1,42 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import org.apache.lucene.util.PriorityQueue; - -final class LegacySegmentMergeQueue extends PriorityQueue { - LegacySegmentMergeQueue(int size) { - initialize(size); - } - - @Override - protected final boolean lessThan(LegacySegmentMergeInfo a, LegacySegmentMergeInfo b) { - int comparison = a.term.compareTo(b.term); - if (comparison == 0) - return a.base < b.base; - else - return comparison < 0; - } - - final void close() throws IOException { - while (top() != null) - ((LegacySegmentMergeInfo)pop()).close(); - } - -} Index: lucene/src/java/org/apache/lucene/index/ParallelReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/ParallelReader.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/ParallelReader.java (working copy) @@ -452,18 +452,6 @@ } @Override - public TermEnum terms() throws IOException { - ensureOpen(); - return new ParallelTermEnum(); - } - - @Override - public TermEnum terms(Term term) throws IOException { - ensureOpen(); - return new ParallelTermEnum(term); - } - - @Override public int docFreq(Term term) throws IOException { ensureOpen(); IndexReader reader = fieldToReader.get(term.field()); @@ -477,30 +465,6 @@ return reader == null? 0 : reader.docFreq(field, term); } - @Override - public TermDocs termDocs(Term term) throws IOException { - ensureOpen(); - return new ParallelTermDocs(term); - } - - @Override - public TermDocs termDocs() throws IOException { - ensureOpen(); - return new ParallelTermDocs(); - } - - @Override - public TermPositions termPositions(Term term) throws IOException { - ensureOpen(); - return new ParallelTermPositions(term); - } - - @Override - public TermPositions termPositions() throws IOException { - ensureOpen(); - return new ParallelTermPositions(); - } - /** * Checks recursively if all subreaders are up to date. */ @@ -574,168 +538,6 @@ } return fieldSet; } - - @Deprecated - private class ParallelTermEnum extends TermEnum { - private String field; - private Iterator fieldIterator; - private TermEnum termEnum; - - public ParallelTermEnum() throws IOException { - try { - field = fieldToReader.firstKey(); - } catch(NoSuchElementException e) { - // No fields, so keep field == null, termEnum == null - return; - } - if (field != null) - termEnum = fieldToReader.get(field).terms(); - } - - public ParallelTermEnum(Term term) throws IOException { - field = term.field(); - IndexReader reader = fieldToReader.get(field); - if (reader!=null) - termEnum = reader.terms(term); - } - - @Override - public boolean next() throws IOException { - if (termEnum==null) - return false; - - // another term in this field? - if (termEnum.next() && termEnum.term().field()==field) - return true; // yes, keep going - - termEnum.close(); // close old termEnum - - // find the next field with terms, if any - if (fieldIterator==null) { - fieldIterator = fieldToReader.tailMap(field).keySet().iterator(); - fieldIterator.next(); // Skip field to get next one - } - while (fieldIterator.hasNext()) { - field = fieldIterator.next(); - termEnum = fieldToReader.get(field).terms(new Term(field)); - Term term = termEnum.term(); - if (term!=null && term.field()==field) - return true; - else - termEnum.close(); - } - - return false; // no more fields - } - - @Override - public Term term() { - if (termEnum==null) - return null; - - return termEnum.term(); - } - - @Override - public int docFreq() { - if (termEnum==null) - return 0; - - return termEnum.docFreq(); - } - - @Override - public void close() throws IOException { - if (termEnum!=null) - termEnum.close(); - } - - } - - // wrap a TermDocs in order to support seek(Term) - private class ParallelTermDocs implements TermDocs { - protected TermDocs termDocs; - - public ParallelTermDocs() {} - public ParallelTermDocs(Term term) throws IOException { - if (term == null) - termDocs = readers.isEmpty() ? null : readers.get(0).termDocs(null); - else - seek(term); - } - - public int doc() { return termDocs.doc(); } - public int freq() { return termDocs.freq(); } - - public void seek(Term term) throws IOException { - IndexReader reader = fieldToReader.get(term.field()); - termDocs = reader!=null ? reader.termDocs(term) : null; - } - - public void seek(TermEnum termEnum) throws IOException { - seek(termEnum.term()); - } - - public boolean next() throws IOException { - if (termDocs==null) - return false; - - return termDocs.next(); - } - - public int read(final int[] docs, final int[] freqs) throws IOException { - if (termDocs==null) - return 0; - - return termDocs.read(docs, freqs); - } - - public boolean skipTo(int target) throws IOException { - if (termDocs==null) - return false; - - return termDocs.skipTo(target); - } - - public void close() throws IOException { - if (termDocs!=null) - termDocs.close(); - } - - } - - private class ParallelTermPositions - extends ParallelTermDocs implements TermPositions { - - public ParallelTermPositions() {} - public ParallelTermPositions(Term term) throws IOException { seek(term); } - - @Override - public void seek(Term term) throws IOException { - IndexReader reader = fieldToReader.get(term.field()); - termDocs = reader!=null ? reader.termPositions(term) : null; - } - - public int nextPosition() throws IOException { - // It is an error to call this if there is no next position, e.g. if termDocs==null - return ((TermPositions)termDocs).nextPosition(); - } - - public int getPayloadLength() throws IOException { - return ((TermPositions)termDocs).getPayloadLength(); - } - - public byte[] getPayload(byte[] data, int offset) throws IOException { - return ((TermPositions)termDocs).getPayload(data, offset); - } - - - // TODO: Remove warning after API has been finalized - public boolean isPayloadAvailable() { - return ((TermPositions) termDocs).isPayloadAvailable(); - } - } - } Index: lucene/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentReader.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -39,11 +39,7 @@ import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Bits; import org.apache.lucene.util.CloseableThreadLocal; -import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.index.codecs.CodecProvider; -import org.apache.lucene.index.codecs.preflex.PreFlexFields; -import org.apache.lucene.index.codecs.preflex.SegmentTermDocs; -import org.apache.lucene.index.codecs.preflex.SegmentTermPositions; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close import org.apache.lucene.util.BytesRef; @@ -93,7 +89,6 @@ final FieldInfos fieldInfos; final FieldsProducer fields; - final boolean isPreFlex; final CodecProvider codecs; final Directory dir; @@ -140,7 +135,6 @@ fields = si.getCodec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor)); assert fields != null; - isPreFlex = fields instanceof PreFlexFields; success = true; } finally { if (!success) { @@ -824,37 +818,6 @@ return new ArrayList(si.files()); } - @Override - public TermEnum terms() throws IOException { - ensureOpen(); - if (core.isPreFlex) { - // For old API on an old segment, instead of - // converting old API -> new API -> old API, just give - // direct access to old: - return ((PreFlexFields) core.fields).tis.terms(); - } else { - // Emulate pre-flex API on top of flex index - return new LegacyTermEnum(null); - } - } - - /** @deprecated Please switch to the flex API ({@link - * #fields}) instead. */ - @Deprecated - @Override - public TermEnum terms(Term t) throws IOException { - ensureOpen(); - if (core.isPreFlex) { - // For old API on an old segment, instead of - // converting old API -> new API -> old API, just give - // direct access to old: - return ((PreFlexFields) core.fields).tis.terms(t); - } else { - // Emulate pre-flex API on top of flex index - return new LegacyTermEnum(t); - } - } - FieldInfos fieldInfos() { return core.fieldInfos; } @@ -870,75 +833,12 @@ return (deletedDocs != null && deletedDocs.get(n)); } - /** @deprecated Switch to the flex API ({@link - * IndexReader#termDocsEnum}) instead. */ - @Deprecated @Override - public TermDocs termDocs(Term term) throws IOException { - if (term == null) { - return new AllTermDocs(this); - } else { - return super.termDocs(term); - } - } - - @Override public Fields fields() throws IOException { return core.fields; } - /** @deprecated Switch to the flex API {@link - * IndexReader#termDocsEnum} instead. */ - @Deprecated @Override - public TermDocs termDocs() throws IOException { - ensureOpen(); - if (core.isPreFlex) { - // For old API on an old segment, instead of - // converting old API -> new API -> old API, just give - // direct access to old: - final PreFlexFields pre = (PreFlexFields) core.fields; - SegmentTermDocs std = new SegmentTermDocs(pre.freqStream, pre.tis, core.fieldInfos); - std.setSkipDocs(deletedDocs); - return std; - } else { - // Emulate old API - return new LegacyTermDocs(); - } - } - - /** @deprecated Switch to the flex API {@link - * IndexReader#termDocsEnum} instead */ - @Deprecated - @Override - public TermPositions termPositions() throws IOException { - ensureOpen(); - if (core.isPreFlex) { - // For old API on an old segment, instead of - // converting old API -> new API -> old API, just give - // direct access to old: - final PreFlexFields pre = (PreFlexFields) core.fields; - SegmentTermPositions stp = new SegmentTermPositions(pre.freqStream, pre.proxStream, pre.tis, core.fieldInfos); - stp.setSkipDocs(deletedDocs); - return stp; - } else { - // Emulate old API - return new LegacyTermPositions(); - } - } - - @Override - public int docFreq(Term t) throws IOException { - ensureOpen(); - Terms terms = core.fields.terms(t.field); - if (terms != null) { - return terms.docFreq(new BytesRef(t.text)); - } else { - return 0; - } - } - - @Override public int docFreq(String field, BytesRef term) throws IOException { ensureOpen(); @@ -1328,393 +1228,4 @@ public int getTermInfosIndexDivisor() { return core.termsIndexDivisor; } - - // Back compat: pre-flex TermEnum API over flex API - @Deprecated - final private class LegacyTermEnum extends TermEnum { - FieldsEnum fields; - TermsEnum terms; - boolean done; - String currentField; - BytesRef currentTerm; - - public LegacyTermEnum(Term t) throws IOException { - fields = core.fields.iterator(); - currentField = fields.next(); - if (currentField == null) { - // no fields - done = true; - } else if (t != null) { - // Pre-seek to this term - - while(currentField.compareTo(t.field) < 0) { - currentField = fields.next(); - if (currentField == null) { - // Hit end of fields - done = true; - break; - } - } - - if (!done) { - // We found some field -- get its terms: - terms = fields.terms(); - - if (currentField == t.field) { - // We found exactly the requested field; now - // seek the term text: - String text = t.text(); - - // this is only for backwards compatibility. - // previously you could supply a term with unpaired surrogates, - // and it would return the next Term. - // if someone does this, tack on the lowest possible trail surrogate. - // this emulates the old behavior, and forms "valid UTF-8" unicode. - BytesRef tr = new BytesRef(UnicodeUtil.nextValidUTF16String(text)); - TermsEnum.SeekStatus status = terms.seek(tr); - - if (status == TermsEnum.SeekStatus.END) { - // Rollover to the next field - terms = null; - next(); - } else if (status == TermsEnum.SeekStatus.FOUND) { - // Found exactly the term - currentTerm = tr; - } else { - // Found another term, in this same field - currentTerm = terms.term(); - } - } else { - // We didn't find exact field (we found the - // following field); advance to first term in - // this field - next(); - } - } - } else { - terms = fields.terms(); - } - } - - @Override - public boolean next() throws IOException { - - if (done) { - return false; - } - - while(true) { - if (terms == null) { - // Advance to the next field - currentField = fields.next(); - if (currentField == null) { - done = true; - return false; - } - terms = fields.terms(); - } - currentTerm = terms.next(); - if (currentTerm != null) { - // This field still has terms - return true; - } else { - // Done producing terms from this field; advance - // to next field - terms = null; - } - } - } - - @Override - public Term term() { - if (!done && terms != null && currentTerm != null) { - return new Term(currentField, currentTerm.utf8ToString()); - } - return null; - } - - @Override - public int docFreq() { - return terms == null ? 0 : terms.docFreq(); - } - - @Override - public void close() {} - } - - // Back compat: emulates legacy TermDocs API on top of - // flex API - private class LegacyTermDocs implements TermDocs { - - String currentField; - final Fields fields; - TermsEnum terms; - DocsEnum docsEnum; - boolean any; - - LegacyTermDocs() throws IOException { - fields = core.fields; - } - - public void close() {} - - public void seek(TermEnum termEnum) throws IOException { - seek(termEnum.term()); - } - - public boolean skipTo(int target) throws IOException { - if (!any) { - return false; - } else { - return docsEnum.advance(target) != docsEnum.NO_MORE_DOCS; - } - } - - public void seek(Term term) throws IOException { - - any = false; - - if (terms != null && !term.field.equals(currentField)) { - // new field - terms = null; - } - - if (terms == null) { - currentField = term.field; - Terms terms1 = fields.terms(currentField); - if (terms1 == null) { - // no such field - return; - } else { - terms = terms1.iterator(); - } - } - - if (terms.seek(new BytesRef(term.text)) == TermsEnum.SeekStatus.FOUND) { - // Term exists - any = true; - pendingBulkResult = null; - docsEnum = terms.docs(deletedDocs, docsEnum); - } - } - - public int doc() { - if (!any) { - return 0; - } else { - return docsEnum.docID(); - } - } - - private DocsEnum.BulkReadResult pendingBulkResult; - private int bulkCount; - private int pendingBulk; - - public int read(int[] docs, int[] freqs) throws IOException { - if (any && pendingBulkResult == null) { - pendingBulkResult = docsEnum.getBulkResult(); - } - if (!any) { - return 0; - } else if (pendingBulk > 0) { - final int left = bulkCount - pendingBulk; - if (docs.length >= left) { - // read all pending - System.arraycopy(pendingBulkResult.docs.ints, pendingBulk, docs, 0, left); - System.arraycopy(pendingBulkResult.freqs.ints, pendingBulk, freqs, 0, left); - pendingBulk = 0; - return left; - } else { - // read only part of pending - System.arraycopy(pendingBulkResult.docs.ints, pendingBulk, docs, 0, docs.length); - System.arraycopy(pendingBulkResult.freqs.ints, pendingBulk, freqs, 0, docs.length); - pendingBulk += docs.length; - return docs.length; - } - } else { - // nothing pending - bulkCount = docsEnum.read(); - if (docs.length >= bulkCount) { - System.arraycopy(pendingBulkResult.docs.ints, 0, docs, 0, bulkCount); - System.arraycopy(pendingBulkResult.freqs.ints, 0, freqs, 0, bulkCount); - return bulkCount; - } else { - System.arraycopy(pendingBulkResult.docs.ints, 0, docs, 0, docs.length); - System.arraycopy(pendingBulkResult.freqs.ints, 0, freqs, 0, docs.length); - pendingBulk = docs.length; - return docs.length; - } - } - } - - public int freq() { - if (!any) { - return 0; - } else { - return docsEnum.freq(); - } - } - - public boolean next() throws IOException { - if (!any) { - return false; - } else { - return docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS; - } - } - } - - // Back compat: implements legacy TermPositions API on top - // of flex API - final private class LegacyTermPositions implements TermPositions { - - String currentField; - final Fields fields; - TermsEnum terms; - DocsAndPositionsEnum postingsEnum; - DocsEnum docsEnum; - boolean any; - - LegacyTermPositions() throws IOException { - fields = core.fields; - } - - public void close() {} - - public void seek(TermEnum termEnum) throws IOException { - seek(termEnum.term()); - } - - public boolean skipTo(int target) throws IOException { - if (!any) { - return false; - } else { - return docsEnum.advance(target) != docsEnum.NO_MORE_DOCS; - } - } - - public void seek(Term term) throws IOException { - - any = false; - - if (terms != null && !term.field.equals(currentField)) { - // new field - terms = null; - } - - if (terms == null) { - currentField = term.field; - Terms terms1 = fields.terms(currentField); - if (terms1 == null) { - // no such field - return; - } else { - terms = terms1.iterator(); - } - } - - if (terms.seek(new BytesRef(term.text)) == TermsEnum.SeekStatus.FOUND) { - // Term exists - any = true; - postingsEnum = terms.docsAndPositions(deletedDocs, postingsEnum); - if (postingsEnum == null) { - docsEnum = terms.docs(deletedDocs, postingsEnum); - } else { - docsEnum = postingsEnum; - } - } - } - - public int doc() { - if (!any) { - return 0; - } else { - return docsEnum.docID(); - } - } - - public int freq() { - if (!any) { - return 0; - } else { - return docsEnum.freq(); - } - } - - public boolean next() throws IOException { - if (!any) { - return false; - } else { - return docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS; - } - } - - public int read(int[] docs, int[] freqs) throws IOException { - throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead."); - } - - public int nextPosition() throws IOException { - pendingPayload = null; - if (!any || postingsEnum == null) { - return 0; - } else { - return postingsEnum.nextPosition(); - } - } - - private BytesRef pendingPayload; - - public int getPayloadLength() throws IOException { - if (!any || postingsEnum == null) { - return 0; - } else { - if (pendingPayload == null) { - if (!postingsEnum.hasPayload()) { - return 0; - } - pendingPayload = postingsEnum.getPayload(); - } - if (pendingPayload == null) { - return 0; - } - return pendingPayload.length; - } - } - - public byte[] getPayload(byte[] bytes, int offset) throws IOException { - if (!any || postingsEnum == null) { - return null; - } - if (pendingPayload == null) { - if (!postingsEnum.hasPayload()) { - return null; - } - pendingPayload = postingsEnum.getPayload(); - } - if (pendingPayload == null) { - return null; - } - - // old API would always used passed in bytes if it - // "fits", else allocate new: - if (bytes != null && pendingPayload.length <= bytes.length - offset) { - System.arraycopy(pendingPayload.bytes, pendingPayload.offset, bytes, offset, pendingPayload.length); - return bytes; - } else if (pendingPayload.offset == 0 && pendingPayload.length == pendingPayload.bytes.length) { - return pendingPayload.bytes; - } else { - final byte[] retBytes = new byte[pendingPayload.length]; - System.arraycopy(pendingPayload.bytes, pendingPayload.offset, retBytes, 0, pendingPayload.length); - return retBytes; - } - } - - public boolean isPayloadAvailable() { - if (!any || postingsEnum == null) { - return false; - } else { - return postingsEnum.hasPayload(); - } - } - } } Index: lucene/src/java/org/apache/lucene/index/AbstractAllTermDocs.java =================================================================== --- lucene/src/java/org/apache/lucene/index/AbstractAllTermDocs.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/AbstractAllTermDocs.java (working copy) @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.index; - -import java.io.IOException; - -/** Base class for enumerating all but deleted docs. - * - *

NOTE: this class is meant only to be used internally - * by Lucene; it's only public so it can be shared across - * packages. This means the API is freely subject to - * change, and, the class could be removed entirely, in any - * Lucene release. Use directly at your own risk! */ -@Deprecated -public abstract class AbstractAllTermDocs implements TermDocs { - - protected int maxDoc; - protected int doc = -1; - - protected AbstractAllTermDocs(int maxDoc) { - this.maxDoc = maxDoc; - } - - public void seek(Term term) throws IOException { - if (term==null) { - doc = -1; - } else { - throw new UnsupportedOperationException(); - } - } - - public void seek(TermEnum termEnum) throws IOException { - throw new UnsupportedOperationException(); - } - - public int doc() { - return doc; - } - - public int freq() { - return 1; - } - - public boolean next() throws IOException { - return skipTo(doc+1); - } - - public int read(int[] docs, int[] freqs) throws IOException { - final int length = docs.length; - int i = 0; - while (i < length && doc < maxDoc) { - if (!isDeleted(doc)) { - docs[i] = doc; - freqs[i] = 1; - ++i; - } - doc++; - } - return i; - } - - public boolean skipTo(int target) throws IOException { - doc = target; - while (doc < maxDoc) { - if (!isDeleted(doc)) { - return true; - } - doc++; - } - return false; - } - - public void close() throws IOException { - } - - public abstract boolean isDeleted(int doc); -} \ No newline at end of file Index: lucene/src/java/org/apache/lucene/index/AllTermDocs.java =================================================================== --- lucene/src/java/org/apache/lucene/index/AllTermDocs.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/AllTermDocs.java (working copy) @@ -1,39 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.index; - -import org.apache.lucene.util.BitVector; - -/** @deprecated Switch to AllDocsEnum */ -@Deprecated -class AllTermDocs extends AbstractAllTermDocs { - - protected BitVector deletedDocs; - - protected AllTermDocs(SegmentReader parent) { - super(parent.maxDoc()); - synchronized (parent) { - this.deletedDocs = parent.deletedDocs; - } - } - - @Override - public boolean isDeleted(int doc) { - return deletedDocs != null && deletedDocs.get(doc); - } -} Index: lucene/src/java/org/apache/lucene/index/LegacySegmentMergeInfo.java =================================================================== --- lucene/src/java/org/apache/lucene/index/LegacySegmentMergeInfo.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/LegacySegmentMergeInfo.java (working copy) @@ -1,86 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -// @deprecated This is pre-flex API -final class LegacySegmentMergeInfo { - Term term; - int base; - int ord; // the position of the segment in a MultiReader - TermEnum termEnum; - IndexReader reader; - int delCount; - private TermPositions postings; // use getPositions() - private int[] docMap; // use getDocMap() - - LegacySegmentMergeInfo(int b, TermEnum te, IndexReader r) - throws IOException { - base = b; - reader = r; - termEnum = te; - term = te.term(); - } - - // maps around deleted docs - int[] getDocMap() { - if (docMap == null) { - delCount = 0; - // build array which maps document numbers around deletions - if (reader.hasDeletions()) { - int maxDoc = reader.maxDoc(); - docMap = new int[maxDoc]; - int j = 0; - for (int i = 0; i < maxDoc; i++) { - if (reader.isDeleted(i)) { - delCount++; - docMap[i] = -1; - } else - docMap[i] = j++; - } - } - } - return docMap; - } - - TermPositions getPositions() throws IOException { - if (postings == null) { - postings = reader.termPositions(); - } - return postings; - } - - final boolean next() throws IOException { - if (termEnum.next()) { - term = termEnum.term(); - return true; - } else { - term = null; - return false; - } - } - - final void close() throws IOException { - termEnum.close(); - if (postings != null) { - postings.close(); - } -} -} - Index: lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (working copy) @@ -30,7 +30,8 @@ public abstract int nextPosition() throws IOException; /** Returns the payload at this position, or null if no - * payload was indexed. */ + * payload was indexed. Only call this once per + * position. */ public abstract BytesRef getPayload() throws IOException; public abstract boolean hasPayload(); Index: lucene/src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiReader.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -25,9 +25,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.DirectoryReader.MultiTermDocs; // deprecated -import org.apache.lucene.index.DirectoryReader.MultiTermEnum; // deprecated -import org.apache.lucene.index.DirectoryReader.MultiTermPositions; // deprecated import org.apache.lucene.search.Similarity; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close import org.apache.lucene.util.Bits; @@ -370,28 +367,6 @@ } @Override - public TermEnum terms() throws IOException { - ensureOpen(); - if (subReaders.length == 1) { - // Optimize single segment case: - return subReaders[0].terms(); - } else { - return new MultiTermEnum(this, subReaders, starts, null); - } - } - - @Override - public TermEnum terms(Term term) throws IOException { - ensureOpen(); - if (subReaders.length == 1) { - // Optimize single segment case: - return subReaders[0].terms(term); - } else { - return new MultiTermEnum(this, subReaders, starts, term); - } - } - - @Override public int docFreq(Term t) throws IOException { ensureOpen(); int total = 0; // sum freqs in segments @@ -411,39 +386,6 @@ } @Override - public TermDocs termDocs() throws IOException { - ensureOpen(); - if (subReaders.length == 1) { - // Optimize single segment case: - return subReaders[0].termDocs(); - } else { - return new MultiTermDocs(this, subReaders, starts); - } - } - - @Override - public TermDocs termDocs(Term term) throws IOException { - ensureOpen(); - if (subReaders.length == 1) { - // Optimize single segment case: - return subReaders[0].termDocs(term); - } else { - return super.termDocs(term); - } - } - - @Override - public TermPositions termPositions() throws IOException { - ensureOpen(); - if (subReaders.length == 1) { - // Optimize single segment case: - return subReaders[0].termPositions(); - } else { - return new MultiTermPositions(this, subReaders, starts); - } - } - - @Override protected void doCommit(Map commitUserData) throws IOException { for (int i = 0; i < subReaders.length; i++) subReaders[i].commit(commitUserData); Index: lucene/src/java/org/apache/lucene/index/DirectoryReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DirectoryReader.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/DirectoryReader.java (working copy) @@ -686,28 +686,6 @@ } @Override - public TermEnum terms() throws IOException { - ensureOpen(); - if (subReaders.length == 1) { - // Optimize single segment case: - return subReaders[0].terms(); - } else { - return new MultiTermEnum(this, subReaders, starts, null); - } - } - - @Override - public TermEnum terms(Term term) throws IOException { - ensureOpen(); - if (subReaders.length == 1) { - // Optimize single segment case: - return subReaders[0].terms(term); - } else { - return new MultiTermEnum(this, subReaders, starts, term); - } - } - - @Override public int docFreq(Term t) throws IOException { ensureOpen(); int total = 0; // sum freqs in segments @@ -727,43 +705,10 @@ } @Override - public TermDocs termDocs() throws IOException { - ensureOpen(); - if (subReaders.length == 1) { - // Optimize single segment case: - return subReaders[0].termDocs(); - } else { - return new MultiTermDocs(this, subReaders, starts); - } - } - - @Override - public TermDocs termDocs(Term term) throws IOException { - ensureOpen(); - if (subReaders.length == 1) { - // Optimize single segment case: - return subReaders[0].termDocs(term); - } else { - return super.termDocs(term); - } - } - - @Override public Fields fields() throws IOException { throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)"); } - @Override - public TermPositions termPositions() throws IOException { - ensureOpen(); - if (subReaders.length == 1) { - // Optimize single segment case: - return subReaders[0].termPositions(); - } else { - return new MultiTermPositions(this, subReaders, starts); - } - } - /** * Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is directory * owner. @@ -1122,280 +1067,4 @@ throw new UnsupportedOperationException("This IndexCommit does not support deletions"); } } - - // @deprecated This is pre-flex API - // Exposes pre-flex API by doing on-the-fly merging - // pre-flex API to each segment - static class MultiTermEnum extends TermEnum { - IndexReader topReader; // used for matching TermEnum to TermDocs - private LegacySegmentMergeQueue queue; - - private Term term; - private int docFreq; - final LegacySegmentMergeInfo[] matchingSegments; // null terminated array of matching segments - - public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t) - throws IOException { - this.topReader = topReader; - queue = new LegacySegmentMergeQueue(readers.length); - matchingSegments = new LegacySegmentMergeInfo[readers.length+1]; - for (int i = 0; i < readers.length; i++) { - IndexReader reader = readers[i]; - TermEnum termEnum; - - if (t != null) { - termEnum = reader.terms(t); - } else { - termEnum = reader.terms(); - } - - LegacySegmentMergeInfo smi = new LegacySegmentMergeInfo(starts[i], termEnum, reader); - smi.ord = i; - if (t == null ? smi.next() : termEnum.term() != null) - queue.add(smi); // initialize queue - else - smi.close(); - } - - if (t != null && queue.size() > 0) { - next(); - } - } - - @Override - public boolean next() throws IOException { - for (int i=0; i { - TermPositionsQueue(List termPositions) throws IOException { - initialize(termPositions.size()); - - for (TermPositions tp : termPositions) { - if (tp.next()) - add(tp); - } - } - - final TermPositions peek() { - return top(); - } - - @Override - public final boolean lessThan(TermPositions a, TermPositions b) { - return a.doc() < b.doc(); - } - } - - private static final class IntQueue { - private int _arraySize = 16; - private int _index = 0; - private int _lastIndex = 0; - private int[] _array = new int[_arraySize]; - - final void add(int i) { - if (_lastIndex == _arraySize) - growArray(); - - _array[_lastIndex++] = i; - } - - final int next() { - return _array[_index++]; - } - - final void sort() { - Arrays.sort(_array, _index, _lastIndex); - } - - final void clear() { - _index = 0; - _lastIndex = 0; - } - - final int size() { - return (_lastIndex - _index); - } - - private void growArray() { - _array = ArrayUtil.grow(_array, _arraySize+1); - _arraySize = _array.length; - } - } - - private int _doc; - private int _freq; - private TermPositionsQueue _termPositionsQueue; - private IntQueue _posList; - - /** - * Creates a new MultipleTermPositions instance. - * - * @exception IOException - */ - public MultipleTermPositions(IndexReader indexReader, Term[] terms) throws IOException { - List termPositions = new LinkedList(); - - for (int i = 0; i < terms.length; i++) - termPositions.add(indexReader.termPositions(terms[i])); - - _termPositionsQueue = new TermPositionsQueue(termPositions); - _posList = new IntQueue(); - } - - public final boolean next() throws IOException { - if (_termPositionsQueue.size() == 0) - return false; - - _posList.clear(); - _doc = _termPositionsQueue.peek().doc(); - - TermPositions tp; - do { - tp = _termPositionsQueue.peek(); - - for (int i = 0; i < tp.freq(); i++) - _posList.add(tp.nextPosition()); - - if (tp.next()) - _termPositionsQueue.updateTop(); - else { - _termPositionsQueue.pop(); - tp.close(); - } - } while (_termPositionsQueue.size() > 0 && _termPositionsQueue.peek().doc() == _doc); - - _posList.sort(); - _freq = _posList.size(); - - return true; - } - - public final int nextPosition() { - return _posList.next(); - } - - public final boolean skipTo(int target) throws IOException { - while (_termPositionsQueue.peek() != null && target > _termPositionsQueue.peek().doc()) { - TermPositions tp = _termPositionsQueue.pop(); - if (tp.skipTo(target)) - _termPositionsQueue.add(tp); - else - tp.close(); - } - return next(); - } - - public final int doc() { - return _doc; - } - - public final int freq() { - return _freq; - } - - public final void close() throws IOException { - while (_termPositionsQueue.size() > 0) - _termPositionsQueue.pop().close(); - } - - /** - * Not implemented. - * @throws UnsupportedOperationException - */ - public void seek(Term arg0) throws IOException { - throw new UnsupportedOperationException(); - } - - /** - * Not implemented. - * @throws UnsupportedOperationException - */ - public void seek(TermEnum termEnum) throws IOException { - throw new UnsupportedOperationException(); - } - - /** - * Not implemented. - * @throws UnsupportedOperationException - */ - public int read(int[] arg0, int[] arg1) throws IOException { - throw new UnsupportedOperationException(); - } - - - /** - * Not implemented. - * @throws UnsupportedOperationException - */ - public int getPayloadLength() { - throw new UnsupportedOperationException(); - } - - /** - * Not implemented. - * @throws UnsupportedOperationException - */ - public byte[] getPayload(byte[] data, int offset) throws IOException { - throw new UnsupportedOperationException(); - } - - /** - * - * @return false - */ - // TODO: Remove warning after API has been finalized - public boolean isPayloadAvailable() { - return false; - } -} Index: lucene/src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -361,21 +361,7 @@ in.setNorm(d, f, b); } - // final to force subclass to impl flex APIs, instead @Override - public final TermEnum terms() throws IOException { - ensureOpen(); - return in.terms(); - } - - // final to force subclass to impl flex APIs, instead - @Override - public final TermEnum terms(Term t) throws IOException { - ensureOpen(); - return in.terms(t); - } - - @Override public int docFreq(Term t) throws IOException { ensureOpen(); return in.docFreq(t); @@ -387,28 +373,7 @@ return in.docFreq(field, t); } - // final to force subclass to impl flex APIs, instead @Override - public final TermDocs termDocs() throws IOException { - ensureOpen(); - return in.termDocs(); - } - - // final to force subclass to impl flex APIs, instead - @Override - public final TermDocs termDocs(Term term) throws IOException { - ensureOpen(); - return in.termDocs(term); - } - - // final to force subclass to impl flex APIs, instead - @Override - public final TermPositions termPositions() throws IOException { - ensureOpen(); - return in.termPositions(); - } - - @Override protected void doDelete(int n) throws CorruptIndexException, IOException { in.deleteDocument(n); } @Override Index: lucene/src/java/org/apache/lucene/index/TermPositions.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermPositions.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/TermPositions.java (working copy) @@ -1,80 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -/** - * TermPositions provides an interface for enumerating the <document, - * frequency, <position>* > tuples for a term.

The document and - * frequency are the same as for a TermDocs. The positions portion lists the ordinal - * positions of each occurrence of a term in a document. - * - * @see IndexReader#termPositions() - * @deprecated Use {@link DocsAndPositionsEnum} instead - */ -@Deprecated -public interface TermPositions - extends TermDocs -{ - /** Returns next position in the current document. It is an error to call - this more than {@link #freq()} times - without calling {@link #next()}

This is - invalid until {@link #next()} is called for - the first time. - */ - int nextPosition() throws IOException; - - /** - * Returns the length of the payload at the current term position. - * This is invalid until {@link #nextPosition()} is called for - * the first time.
- * @return length of the current payload in number of bytes - */ - int getPayloadLength() throws IOException; - - /** - * Returns the payload data at the current term position. - * This is invalid until {@link #nextPosition()} is called for - * the first time. - * This method must not be called more than once after each call - * of {@link #nextPosition()}. However, payloads are loaded lazily, - * so if the payload data for the current position is not needed, - * this method may not be called at all for performance reasons.
- * - * @param data the array into which the data of this payload is to be - * stored, if it is big enough; otherwise, a new byte[] array - * is allocated for this purpose. - * @param offset the offset in the array into which the data of this payload - * is to be stored. - * @return a byte[] array containing the data of this payload - * @throws IOException - */ - byte[] getPayload(byte[] data, int offset) throws IOException; - - /** - * Checks if a payload can be loaded at this position. - *

- * Payloads can only be loaded once per call to - * {@link #nextPosition()}. - * - * @return true if there is a payload available at this position that can be loaded - */ - public boolean isPayloadAvailable(); - -} Index: lucene/src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -868,18 +868,6 @@ setNorm(doc, field, Similarity.getDefault().encodeNormValue(value)); } - /** Returns an enumeration of all the terms in the index. The - * enumeration is ordered by Term.compareTo(). Each term is greater - * than all that precede it in the enumeration. Note that after - * calling terms(), {@link TermEnum#next()} must be called - * on the resulting enumeration before calling other methods such as - * {@link TermEnum#term()}. - * @deprecated Use the new flex API ({@link #fields()}) instead. - * @throws IOException if there is a low-level IO error - */ - @Deprecated - public abstract TermEnum terms() throws IOException; - /** Flex API: returns {@link Fields} for this reader. * This method may return null if the reader has no * postings. @@ -893,24 +881,10 @@ * using {@link ReaderUtil#gatherSubReaders} and iterate * through them yourself. */ public abstract Fields fields() throws IOException; - - /** Returns an enumeration of all terms starting at a given term. If - * the given term does not exist, the enumeration is positioned at the - * first term greater than the supplied term. The enumeration is - * ordered by Term.compareTo(). Each term is greater than all that - * precede it in the enumeration. - * @deprecated Use the new flex API ({@link #fields()}) instead. - * @throws IOException if there is a low-level IO error - */ - @Deprecated - public abstract TermEnum terms(Term t) throws IOException; - /** Returns the number of documents containing the term t. - * @throws IOException if there is a low-level IO error - * @deprecated Use {@link #docFreq(String,BytesRef)} instead. - */ - @Deprecated - public abstract int docFreq(Term t) throws IOException; + public int docFreq(Term term) throws IOException { + return docFreq(term.field(), new BytesRef(term.text())); + } /** Returns the number of documents containing the term * t. This method returns 0 if the term or @@ -929,28 +903,6 @@ return terms.docFreq(term); } - /** Returns an enumeration of all the documents which contain - * term. For each document, the document number, the frequency of - * the term in that document is also provided, for use in - * search scoring. If term is null, then all non-deleted - * docs are returned with freq=1. - * Thus, this method implements the mapping: - *

    - * Term    =>    <docNum, freq>* - *
- *

The enumeration is ordered by document number. Each document number - * is greater than all that precede it in the enumeration. - * @deprecated Use the new flex API ({@link #termDocsEnum}) instead. - * @throws IOException if there is a low-level IO error - */ - @Deprecated - public TermDocs termDocs(Term term) throws IOException { - ensureOpen(); - TermDocs termDocs = termDocs(); - termDocs.seek(term); - return termDocs; - } - /** This may return null if the field does not exist.*/ public Terms terms(String field) throws IOException { final Fields fields = fields(); @@ -997,50 +949,6 @@ } } - /** Returns an unpositioned {@link TermDocs} enumerator. - * @deprecated Use the new flex API ({@link #fields()}) instead. - * @throws IOException if there is a low-level IO error - */ - @Deprecated - public abstract TermDocs termDocs() throws IOException; - - /** Returns an enumeration of all the documents which contain - * term. For each document, in addition to the document number - * and frequency of the term in that document, a list of all of the ordinal - * positions of the term in the document is available. Thus, this method - * implements the mapping: - * - *

    - * Term    =>    <docNum, freq, - * <pos1, pos2, ... - * posfreq-1> - * >* - *
- *

This positional information facilitates phrase and proximity searching. - *

The enumeration is ordered by document number. Each document number is - * greater than all that precede it in the enumeration. - * @deprecated Please switch the flex API ({@link - * #termDocsEnum}) instead - * @throws IOException if there is a low-level IO error - */ - @Deprecated - public TermPositions termPositions(Term term) throws IOException { - ensureOpen(); - TermPositions termPositions = termPositions(); - termPositions.seek(term); - return termPositions; - } - - /** Returns an unpositioned {@link TermPositions} enumerator. - * @deprecated Please switch the flex API ({@link - * #termDocsEnum}) instead - * @throws IOException if there is a low-level IO error - */ - @Deprecated - public abstract TermPositions termPositions() throws IOException; - - - /** Deletes the document numbered docNum. Once a document is * deleted it will not appear in TermDocs or TermPositions enumerations. * Attempts to read its field with the {@link #document} @@ -1089,16 +997,16 @@ */ public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { ensureOpen(); - TermDocs docs = termDocs(term); + DocsEnum docs = MultiFields.getTermDocsEnum(this, + MultiFields.getDeletedDocs(this), + term.field(), + new BytesRef(term.text())); if (docs == null) return 0; int n = 0; - try { - while (docs.next()) { - deleteDocument(docs.doc()); - n++; - } - } finally { - docs.close(); + int doc; + while ((doc = docs.nextDoc()) != docs.NO_MORE_DOCS) { + deleteDocument(doc); + n++; } return n; } Index: lucene/src/java/org/apache/lucene/index/TermEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermEnum.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/TermEnum.java (working copy) @@ -1,42 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.io.Closeable; - -/** Abstract class for enumerating terms. - -

Term enumerations are always ordered by Term.compareTo(). Each term in - the enumeration is greater than all that precede it. -* @deprecated Use TermsEnum instead */ - -@Deprecated -public abstract class TermEnum implements Closeable { - /** Increments the enumeration to the next element. True if one exists.*/ - public abstract boolean next() throws IOException; - - /** Returns the current Term in the enumeration.*/ - public abstract Term term(); - - /** Returns the docFreq of the current Term in the enumeration.*/ - public abstract int docFreq(); - - /** Closes the enumeration to further activity, freeing resources. */ - public abstract void close() throws IOException; -} Index: lucene/src/java/org/apache/lucene/index/TermDocs.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermDocs.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/TermDocs.java (working copy) @@ -1,85 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.io.Closeable; - -/** TermDocs provides an interface for enumerating <document, frequency> - pairs for a term.

The document portion names each document containing - the term. Documents are indicated by number. The frequency portion gives - the number of times the term occurred in each document.

The pairs are - ordered by document number. - - @see IndexReader#termDocs() - @deprecated Use {@link DocsEnum} instead -*/ - -@Deprecated -public interface TermDocs extends Closeable { - /** Sets this to the data for a term. - * The enumeration is reset to the start of the data for this term. - */ - void seek(Term term) throws IOException; - - /** Sets this to the data for the current term in a {@link TermEnum}. - * This may be optimized in some implementations. - */ - void seek(TermEnum termEnum) throws IOException; - - /** Returns the current document number.

This is invalid until {@link - #next()} is called for the first time.*/ - int doc(); - - /** Returns the frequency of the term within the current document.

This - is invalid until {@link #next()} is called for the first time.*/ - int freq(); - - /** Moves to the next pair in the enumeration.

Returns true iff there is - such a next pair in the enumeration. */ - boolean next() throws IOException; - - /** Attempts to read multiple entries from the enumeration, up to length of - * docs. Document numbers are stored in docs, and term - * frequencies are stored in freqs. The freqs array must be as - * long as the docs array. - * - *

Returns the number of entries read. Zero is only returned when the - * stream has been exhausted. */ - int read(int[] docs, int[] freqs) throws IOException; - - /** Skips entries to the first beyond the current whose document number is - * greater than or equal to target.

Returns true iff there is such - * an entry.

Behaves as if written:

-   *   boolean skipTo(int target) {
-   *     do {
-   *       if (!next())
-   * 	     return false;
-   *     } while (target > doc());
-   *     return true;
-   *   }
-   * 
- * Some implementations are considerably more efficient than that. - */ - boolean skipTo(int target) throws IOException; - - /** Frees associated resources. */ - void close() throws IOException; -} - - Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (working copy) @@ -152,11 +152,19 @@ @Override public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException { - final SegmentDocsEnum docsEnum; + SegmentDocsEnum docsEnum; if (reuse == null) { docsEnum = new SegmentDocsEnum(freqIn); + } else if (!(reuse instanceof SegmentDocsEnum)) { + docsEnum = new SegmentDocsEnum(freqIn); } else { docsEnum = (SegmentDocsEnum) reuse; + if (docsEnum.freqIn != freqIn) { + // If you are using ParellelReader, and pass in a + // reused DocsEnum, it could have come from another + // reader also using standard codec + docsEnum = new SegmentDocsEnum(freqIn); + } } return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); } Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java (working copy) @@ -20,7 +20,6 @@ import java.io.IOException; import org.apache.lucene.store.IndexInput; import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.CorruptIndexException; @@ -30,7 +29,7 @@ * @lucene.experimental */ @Deprecated -public final class SegmentTermEnum extends TermEnum implements Cloneable { +public final class SegmentTermEnum implements Cloneable { private IndexInput input; FieldInfos fieldInfos; long size; @@ -128,7 +127,6 @@ } /** Increments the enumeration to the next element. True if one exists.*/ - @Override public final boolean next() throws IOException { if (position++ >= size - 1) { prevBuffer.set(termBuffer); @@ -176,7 +174,6 @@ /** Returns the current Term in the enumeration. Initially invalid, valid after next() called for the first time.*/ - @Override public final Term term() { return termBuffer.toTerm(); } @@ -200,7 +197,6 @@ /** Returns the docFreq from the current TermInfo in the enumeration. Initially invalid, valid after next() called for the first time.*/ - @Override public final int docFreq() { return termInfo.docFreq; } @@ -218,7 +214,6 @@ } /** Closes the enumeration to further activity, freeing resources. */ - @Override public final void close() throws IOException { input.close(); } Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java (working copy) @@ -21,12 +21,11 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermPositions; import org.apache.lucene.store.IndexInput; /** @lucene.experimental */ public final class SegmentTermPositions -extends SegmentTermDocs implements TermPositions { +extends SegmentTermDocs { private IndexInput proxStream; private IndexInput proxStreamOrig; private int proxCount; @@ -55,7 +54,6 @@ this.proxStreamOrig = proxStream; // the proxStream will be cloned lazily when nextPosition() is called for the first time } - @Override final void seek(TermInfo ti, Term term) throws IOException { super.seek(ti, term); if (ti != null) @@ -67,7 +65,6 @@ needToLoadPayload = false; } - @Override public final void close() throws IOException { super.close(); if (proxStream != null) proxStream.close(); @@ -99,13 +96,11 @@ return delta; } - @Override protected final void skippingDoc() throws IOException { // we remember to skip a document lazily lazySkipProxCount += freq; } - @Override public final boolean next() throws IOException { // we remember to skip the remaining positions of the current // document lazily @@ -119,14 +114,12 @@ return false; } - @Override public final int read(final int[] docs, final int[] freqs) { throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead."); } /** Called by super.skipTo(). */ - @Override protected void skipProx(long proxPointer, int payloadLength) throws IOException { // we save the pointer, we might have to skip there lazily lazySkipPointer = proxPointer; Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (revision 954967) +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (working copy) @@ -22,8 +22,6 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.codecs.standard.DefaultSkipListReader; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; @@ -31,7 +29,7 @@ /** @deprecated * @lucene.experimental */ @Deprecated -public class SegmentTermDocs implements TermDocs { +public class SegmentTermDocs { //protected SegmentReader parent; private final FieldInfos fieldInfos; private final TermInfosReader tis; @@ -84,17 +82,16 @@ this.skipDocs = skipDocs; } - public void seek(TermEnum termEnum) throws IOException { + public void seek(SegmentTermEnum segmentTermEnum) throws IOException { TermInfo ti; Term term; // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs - if (termEnum instanceof SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == fieldInfos) { // optimized case - SegmentTermEnum segmentTermEnum = ((SegmentTermEnum) termEnum); + if (segmentTermEnum.fieldInfos == fieldInfos) { // optimized case term = segmentTermEnum.term(); ti = segmentTermEnum.termInfo(); } else { // punt case - term = termEnum.term(); + term = segmentTermEnum.term(); ti = tis.get(term); } Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java (revision 954967) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java (working copy) @@ -58,7 +58,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.search.Query; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanOrQuery; @@ -84,8 +87,15 @@ public BasicQueryFactory getBasicQueryFactory() {return qf;} - public TermEnum getTermEnum(String termText) throws IOException { - return getIndexReader().terms(new Term(getFieldName(), termText)); + public TermsEnum getTermsEnum(String termText) throws IOException { + Terms terms = MultiFields.getTerms(getIndexReader(), getFieldName()); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + termsEnum.seek(new BytesRef(termText)); + return termsEnum; + } else { + return null; + } } public int size() {return weightBySpanQuery.size();} Index: lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java =================================================================== --- lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java (revision 954967) +++ lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java (working copy) @@ -23,10 +23,13 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; +import org.apache.lucene.util.BytesRef; import java.io.File; import java.util.Date; @@ -39,7 +42,7 @@ private static boolean deleting = false; // true during deletion pass private static IndexReader reader; // existing index private static IndexWriter writer; // new index being built - private static TermEnum uidIter; // document id iterator + private static TermsEnum uidIter; // document id iterator /** Indexer for HTML files.*/ public static void main(String[] argv) { @@ -110,21 +113,24 @@ if (!create) { // incrementally update reader = IndexReader.open(FSDirectory.open(index), false); // open existing index - uidIter = reader.terms(new Term("uid", "")); // init uid iterator + Terms terms = MultiFields.getTerms(reader, "uid"); + if (terms != null) { + uidIter = terms.iterator(); - indexDocs(file); + indexDocs(file); - if (deleting) { // delete rest of stale docs - while (uidIter.term() != null && uidIter.term().field() == "uid") { - System.out.println("deleting " + - HTMLDocument.uid2url(uidIter.term().text())); - reader.deleteDocuments(uidIter.term()); - uidIter.next(); + if (deleting) { // delete rest of stale docs + BytesRef text; + while ((text=uidIter.next()) != null) { + String termText = text.utf8ToString(); + System.out.println("deleting " + + HTMLDocument.uid2url(termText)); + reader.deleteDocuments(new Term("uid", termText)); + } + deleting = false; } - deleting = false; } - uidIter.close(); // close uid iterator reader.close(); // close existing index } else // don't have exisiting @@ -145,17 +151,21 @@ if (uidIter != null) { String uid = HTMLDocument.uid(file); // construct uid for doc - while (uidIter.term() != null && uidIter.term().field() == "uid" && - uidIter.term().text().compareTo(uid) < 0) { - if (deleting) { // delete stale docs - System.out.println("deleting " + - HTMLDocument.uid2url(uidIter.term().text())); - reader.deleteDocuments(uidIter.term()); + BytesRef text; + while((text = uidIter.next()) != null) { + String termText = text.utf8ToString(); + if (termText.compareTo(uid) < 0) { + if (deleting) { // delete stale docs + System.out.println("deleting " + + HTMLDocument.uid2url(termText)); + reader.deleteDocuments(new Term("uid", termText)); + } + } else { + break; } - uidIter.next(); } - if (uidIter.term() != null && uidIter.term().field() == "uid" && - uidIter.term().text().compareTo(uid) == 0) { + if (text != null && + text.utf8ToString().compareTo(uid) == 0) { uidIter.next(); // keep matching docs } else if (!deleting) { // add new docs Document doc = HTMLDocument.Document(file); Index: lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java =================================================================== --- lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java (revision 954967) +++ lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java (working copy) @@ -23,8 +23,8 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; @@ -84,11 +84,11 @@ } } - public void testTermEnum() throws Exception { + public void testTermsEnum() throws Exception { InstantiatedIndex ii = new InstantiatedIndex(); IndexReader r = new InstantiatedIndexReader(ii); - termEnumTest(r); + termsEnumTest(r); r.close(); ii.close(); @@ -97,17 +97,13 @@ Directory d = new RAMDirectory(); new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())).close(); r = IndexReader.open(d, false); - termEnumTest(r); + termsEnumTest(r); r.close(); d.close(); } - public void termEnumTest(IndexReader r) throws Exception { - TermEnum terms = r.terms(); - - assertNull(terms.term()); - assertFalse(terms.next()); - + public void termsEnumTest(IndexReader r) throws Exception { + assertNull(MultiFields.getFields(r)); } } Index: lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java =================================================================== --- lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (revision 954967) +++ lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (working copy) @@ -33,15 +33,18 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Payload; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermPositionVector; -import org.apache.lucene.index.TermPositions; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.BytesRef; /** * Asserts equality of content and behaviour of two index readers. @@ -120,132 +123,70 @@ // test seek Term t = new Term("c", "danny"); - TermEnum aprioriTermEnum = aprioriReader.terms(t); - TermEnum testTermEnum = testReader.terms(t); - + TermsEnum aprioriTermEnum = MultiFields.getTerms(aprioriReader, t.field()).iterator(); + aprioriTermEnum.seek(new BytesRef(t.text())); + TermsEnum testTermEnum = MultiFields.getTerms(testReader, t.field()).iterator(); + testTermEnum.seek(new BytesRef(t.text())); assertEquals(aprioriTermEnum.term(), testTermEnum.term()); - t = aprioriTermEnum.term(); + DocsEnum aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), null); + DocsEnum testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), null); - aprioriTermEnum.close(); - testTermEnum.close(); - - TermDocs aprioriTermDocs = aprioriReader.termDocs(t); - TermDocs testTermDocs = testReader.termDocs(t); - - assertEquals(aprioriTermDocs.next(), testTermDocs.next()); + assertEquals(aprioriTermDocs.nextDoc(), testTermDocs.nextDoc()); assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); - if (aprioriTermDocs.skipTo(4)) { - assertTrue(testTermDocs.skipTo(4)); + if (aprioriTermDocs.advance(4) != DocsEnum.NO_MORE_DOCS) { + assertTrue(testTermDocs.advance(4) != DocsEnum.NO_MORE_DOCS); assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); + assertEquals(aprioriTermDocs.docID(), testTermDocs.docID()); } else { - assertFalse(testTermDocs.skipTo(4)); + assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.advance(4)); } - if (aprioriTermDocs.next()) { - assertTrue(testTermDocs.next()); + if (aprioriTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) { + assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); + assertEquals(aprioriTermDocs.docID(), testTermDocs.docID()); } else { - assertFalse(testTermDocs.next()); + assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc()); } // beyond this point all next and skipto will return false - if (aprioriTermDocs.skipTo(100)) { - assertTrue(testTermDocs.skipTo(100)); + if (aprioriTermDocs.advance(100) != DocsEnum.NO_MORE_DOCS) { + assertTrue(testTermDocs.advance(100) != DocsEnum.NO_MORE_DOCS); assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); + assertEquals(aprioriTermDocs.docID(), testTermDocs.docID()); } else { - assertFalse(testTermDocs.skipTo(100)); + assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.advance(100)); } + // start using the API the way one is supposed to use it - if (aprioriTermDocs.next()) { - assertTrue(testTermDocs.next()); - assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); - } else { - assertFalse(testTermDocs.next()); - } + t = new Term("", ""); + FieldsEnum apFieldsEnum = MultiFields.getFields(aprioriReader).iterator(); + String apFirstField = apFieldsEnum.next(); - if (aprioriTermDocs.skipTo(110)) { - assertTrue(testTermDocs.skipTo(110)); - assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); - } else { - assertFalse(testTermDocs.skipTo(110)); - } + FieldsEnum testFieldsEnum = MultiFields.getFields(testReader).iterator(); + String testFirstField = testFieldsEnum.next(); + assertEquals(apFirstField, testFirstField); - if (aprioriTermDocs.skipTo(10)) { - assertTrue(testTermDocs.skipTo(10)); - assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); - } else { - assertFalse(testTermDocs.skipTo(10)); - } + aprioriTermEnum = apFieldsEnum.terms(); + testTermEnum = testFieldsEnum.terms(); + + assertEquals(aprioriTermEnum.next(), testTermEnum.next()); + + aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), aprioriTermDocs); + testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), testTermDocs); - - if (aprioriTermDocs.skipTo(210)) { - assertTrue(testTermDocs.skipTo(210)); + while (aprioriTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) { + assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); - } else { - assertFalse(testTermDocs.skipTo(210)); + assertEquals(aprioriTermDocs.docID(), testTermDocs.docID()); } + assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc()); - aprioriTermDocs.close(); - testTermDocs.close(); - - - - // test seek null (AllTermDocs) - aprioriTermDocs = aprioriReader.termDocs(null); - testTermDocs = testReader.termDocs(null); - - while (aprioriTermDocs.next()) { - assertTrue(testTermDocs.next()); - assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); - } - assertFalse(testTermDocs.next()); - - - aprioriTermDocs.close(); - testTermDocs.close(); - - - // test seek default - aprioriTermDocs = aprioriReader.termDocs(); - testTermDocs = testReader.termDocs(); - - // this is invalid use of the API, - // but if the response differs then it's an indication that something might have changed. - // in 2.9 and 3.0 the two TermDocs-implementations returned different values at this point. -// assertEquals("Descripency during invalid use of the TermDocs API, see comments in test code for details.", -// aprioriTermDocs.next(), testTermDocs.next()); - - // start using the API the way one is supposed to use it - - t = new Term("", ""); - aprioriTermDocs.seek(t); - testTermDocs.seek(t); - - while (aprioriTermDocs.next()) { - assertTrue(testTermDocs.next()); - assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); - } - assertFalse(testTermDocs.next()); - - aprioriTermDocs.close(); - testTermDocs.close(); - - // clean up aprioriReader.close(); testReader.close(); @@ -443,98 +384,85 @@ // compare term enumeration stepping - TermEnum aprioriTermEnum = aprioriReader.terms(); - TermEnum testTermEnum = testReader.terms(); + FieldsEnum aprioriFieldsEnum = MultiFields.getFields(aprioriReader).iterator(); + FieldsEnum testFieldsEnum = MultiFields.getFields(testReader).iterator(); + String aprioriField; + while((aprioriField = aprioriFieldsEnum.next()) != null) { + String testField = testFieldsEnum.next(); + assertEquals(aprioriField, testField); - while (true) { + TermsEnum aprioriTermEnum = aprioriFieldsEnum.terms(); + TermsEnum testTermEnum = testFieldsEnum.terms(); - if (!aprioriTermEnum.next()) { - assertFalse(testTermEnum.next()); - break; - } - assertTrue(testTermEnum.next()); + BytesRef aprioriText; + while((aprioriText = aprioriTermEnum.next()) != null) { + assertEquals(aprioriText, testTermEnum.next()); - assertEquals(aprioriTermEnum.term(), testTermEnum.term()); - assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq()); + assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq()); - // compare termDocs seeking + // compare termDocs seeking - TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term()); - TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term()); + DocsEnum aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), null); + DocsEnum testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), null); + + while (aprioriTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) { + assertTrue(testTermDocs.advance(aprioriTermDocs.docID()) != DocsEnum.NO_MORE_DOCS); + assertEquals(aprioriTermDocs.docID(), testTermDocs.docID()); + } + + // compare documents per term + + assertEquals(aprioriReader.docFreq(aprioriField, aprioriTermEnum.term()), testReader.docFreq(aprioriField, testTermEnum.term())); - while (aprioriTermDocsSeeker.next()) { - assertTrue(testTermDocsSeeker.skipTo(aprioriTermDocsSeeker.doc())); - assertEquals(aprioriTermDocsSeeker.doc(), testTermDocsSeeker.doc()); - } + aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), aprioriTermDocs); + testTermDocs = testTermEnum.docs(MultiFields.getDeletedDocs(testReader), testTermDocs); - aprioriTermDocsSeeker.close(); - testTermDocsSeeker.close(); + while (true) { + if (aprioriTermDocs.nextDoc() == DocsEnum.NO_MORE_DOCS) { + assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc()); + break; + } + assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS); - // compare documents per term - - assertEquals(aprioriReader.docFreq(aprioriTermEnum.term()), testReader.docFreq(testTermEnum.term())); - - TermDocs aprioriTermDocs = aprioriReader.termDocs(aprioriTermEnum.term()); - TermDocs testTermDocs = testReader.termDocs(testTermEnum.term()); - - while (true) { - if (!aprioriTermDocs.next()) { - assertFalse(testTermDocs.next()); - break; + assertEquals(aprioriTermDocs.docID(), testTermDocs.docID()); + assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); } - assertTrue(testTermDocs.next()); - assertEquals(aprioriTermDocs.doc(), testTermDocs.doc()); - assertEquals(aprioriTermDocs.freq(), testTermDocs.freq()); - } + // compare term positions - aprioriTermDocs.close(); - testTermDocs.close(); + DocsAndPositionsEnum aprioriTermPositions = aprioriTermEnum.docsAndPositions(MultiFields.getDeletedDocs(aprioriReader), null); + DocsAndPositionsEnum testTermPositions = testTermEnum.docsAndPositions(MultiFields.getDeletedDocs(testReader), null); - // compare term positions + if (aprioriTermPositions != null) { - TermPositions testTermPositions = testReader.termPositions(testTermEnum.term()); - TermPositions aprioriTermPositions = aprioriReader.termPositions(aprioriTermEnum.term()); + for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) { + boolean hasNext = aprioriTermPositions.nextDoc() != DocsEnum.NO_MORE_DOCS; + if (hasNext) { + assertTrue(testTermPositions.nextDoc() != DocsEnum.NO_MORE_DOCS); + + assertEquals(aprioriTermPositions.freq(), testTermPositions.freq()); - if (aprioriTermPositions != null) { + for (int termPositionIndex = 0; termPositionIndex < aprioriTermPositions.freq(); termPositionIndex++) { + int aprioriPos = aprioriTermPositions.nextPosition(); + int testPos = testTermPositions.nextPosition(); - for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) { - boolean hasNext = aprioriTermPositions.next(); - if (hasNext) { - assertTrue(testTermPositions.next()); - - assertEquals(aprioriTermPositions.freq(), testTermPositions.freq()); - - - for (int termPositionIndex = 0; termPositionIndex < aprioriTermPositions.freq(); termPositionIndex++) { - int aprioriPos = aprioriTermPositions.nextPosition(); - int testPos = testTermPositions.nextPosition(); - - if (aprioriPos != testPos) { assertEquals(aprioriPos, testPos); - } - - assertEquals(aprioriTermPositions.isPayloadAvailable(), testTermPositions.isPayloadAvailable()); - if (aprioriTermPositions.isPayloadAvailable()) { - assertEquals(aprioriTermPositions.getPayloadLength(), testTermPositions.getPayloadLength()); - byte[] aprioriPayloads = aprioriTermPositions.getPayload(new byte[aprioriTermPositions.getPayloadLength()], 0); - byte[] testPayloads = testTermPositions.getPayload(new byte[testTermPositions.getPayloadLength()], 0); - for (int i = 0; i < aprioriPayloads.length; i++) { - assertEquals(aprioriPayloads[i], testPayloads[i]); + assertEquals(aprioriTermPositions.hasPayload(), testTermPositions.hasPayload()); + if (aprioriTermPositions.hasPayload()) { + BytesRef apPayload = aprioriTermPositions.getPayload(); + BytesRef testPayload = testTermPositions.getPayload(); + assertEquals(apPayload, testPayload); } } - } } } - - aprioriTermPositions.close(); - testTermPositions.close(); - } + assertNull(testTermEnum.next()); } + assertNull(testFieldsEnum.next()); // compare term vectors and position vectors @@ -589,12 +517,8 @@ } } - } - aprioriTermEnum.close(); - testTermEnum.close(); - aprioriReader.close(); testReader.close(); } Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (revision 954967) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (working copy) @@ -371,49 +371,11 @@ } @Override - public TermEnum terms() throws IOException { - return new InstantiatedTermEnum(this); - } - - @Override - public TermEnum terms(Term t) throws IOException { - InstantiatedTerm it = getIndex().findTerm(t); - if (it != null) { - return new InstantiatedTermEnum(this, it.getTermIndex()); - } else { - int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator); - if (startPos < 0) { - startPos = -1 - startPos; - } - return new InstantiatedTermEnum(this, startPos); + public Fields fields() { + if (getIndex().getOrderedTerms().length == 0) { + return null; } - } - @Override - public TermDocs termDocs() throws IOException { - return new InstantiatedTermDocs(this); - } - - - @Override - public TermDocs termDocs(Term term) throws IOException { - if (term == null) { - return new InstantiatedAllTermDocs(this); - } else { - InstantiatedTermDocs termDocs = new InstantiatedTermDocs(this); - termDocs.seek(term); - return termDocs; - } - } - - @Override - public TermPositions termPositions() throws IOException { - return new InstantiatedTermPositions(this); - } - - @Override - public Fields fields() { - return new Fields() { @Override public FieldsEnum iterator() { Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java (revision 954967) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java (working copy) @@ -1,83 +0,0 @@ -package org.apache.lucene.store.instantiated; - -/** - * Copyright 2006 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; - -/** - * A {@link org.apache.lucene.index.TermEnum} navigating an {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader}. - */ -public class InstantiatedTermEnum extends TermEnum { - - private final InstantiatedIndexReader reader; - - public InstantiatedTermEnum(InstantiatedIndexReader reader) { - this.nextTermIndex = 0; - this.reader = reader; - } - - public InstantiatedTermEnum(InstantiatedIndexReader reader, int startPosition) { - this.reader = reader; - this.nextTermIndex = startPosition; - next(); - } - - private int nextTermIndex; - private InstantiatedTerm term; - - /** - * Increments the enumeration to the next element. True if one exists. - */ - @Override - public boolean next() { - if (reader.getIndex().getOrderedTerms().length <= nextTermIndex) { - return false; - } else { - term = reader.getIndex().getOrderedTerms()[nextTermIndex]; - nextTermIndex++; - return true; - } - } - - /** - * Returns the current Term in the enumeration. - */ - @Override - public Term term() { - return term == null ? null : term.getTerm(); - } - - /** - * Returns the docFreq of the current Term in the enumeration. - */ - @Override - public int docFreq() { - return term.getAssociatedDocuments().length; - } - - /** - * Closes the enumeration to further activity, freeing resources. - */ - @Override - public void close() { - } - -} - - - Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (revision 954967) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (working copy) @@ -31,10 +31,15 @@ import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.TermPositionVector; -import org.apache.lucene.index.TermPositions; +import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.util.BitVector; +import org.apache.lucene.util.BytesRef; /** * Represented as a coupled graph of class instances, this @@ -220,34 +225,46 @@ } } List terms = new ArrayList(5000 * getTermsByFieldAndText().size()); - TermEnum termEnum = sourceIndexReader.terms(); - while (termEnum.next()) { - if (fields == null || fields.contains(termEnum.term().field())) { // todo skipto if not using field - InstantiatedTerm instantiatedTerm = new InstantiatedTerm(termEnum.term().field(), termEnum.term().text()); - getTermsByFieldAndText().get(termEnum.term().field()).put(termEnum.term().text(), instantiatedTerm); - instantiatedTerm.setTermIndex(terms.size()); - terms.add(instantiatedTerm); - instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termEnum.docFreq()]); + Fields fieldsC = MultiFields.getFields(sourceIndexReader); + if (fieldsC != null) { + FieldsEnum fieldsEnum = fieldsC.iterator(); + String field; + while((field = fieldsEnum.next()) != null) { + if (fields == null || fields.contains(field)) { + TermsEnum termsEnum = fieldsEnum.terms(); + BytesRef text; + while((text = termsEnum.next()) != null) { + String termText = text.utf8ToString(); + InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText); + getTermsByFieldAndText().get(field).put(termText, instantiatedTerm); + instantiatedTerm.setTermIndex(terms.size()); + terms.add(instantiatedTerm); + instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termsEnum.docFreq()]); + } + } } } - termEnum.close(); orderedTerms = terms.toArray(new InstantiatedTerm[terms.size()]); // create term-document informations for (InstantiatedTerm term : orderedTerms) { - TermPositions termPositions = sourceIndexReader.termPositions(term.getTerm()); + DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(sourceIndexReader, + MultiFields.getDeletedDocs(sourceIndexReader), + term.getTerm().field(), + new BytesRef(term.getTerm().text())); int position = 0; - while (termPositions.next()) { - InstantiatedDocument document = documentsByNumber[termPositions.doc()]; + while (termPositions.nextDoc() != DocsEnum.NO_MORE_DOCS) { + InstantiatedDocument document = documentsByNumber[termPositions.docID()]; byte[][] payloads = new byte[termPositions.freq()][]; int[] positions = new int[termPositions.freq()]; for (int i = 0; i < termPositions.freq(); i++) { positions[i] = termPositions.nextPosition(); - if (termPositions.isPayloadAvailable()) { - payloads[i] = new byte[termPositions.getPayloadLength()]; - termPositions.getPayload(payloads[i], 0); + if (termPositions.hasPayload()) { + BytesRef br = termPositions.getPayload(); + payloads[i] = new byte[br.length]; + System.arraycopy(br.bytes, br.offset, payloads[i], 0, br.length); } } Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java (revision 954967) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java (working copy) @@ -1,35 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.store.instantiated; - -import org.apache.lucene.index.AbstractAllTermDocs; - -class InstantiatedAllTermDocs extends AbstractAllTermDocs { - - private InstantiatedIndexReader reader; - - InstantiatedAllTermDocs(InstantiatedIndexReader reader) { - super(reader.maxDoc()); - this.reader = reader; - } - - @Override - public boolean isDeleted(int doc) { - return reader.isDeleted(doc); - } -} Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsEnum.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsEnum.java (revision 954967) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsEnum.java (working copy) @@ -54,7 +54,7 @@ @Override public int advance(int target) { - if (currentDoc.getDocument().getDocumentNumber() >= target) { + if (currentDoc != null && currentDoc.getDocument().getDocumentNumber() >= target) { return nextDoc(); } Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermPositions.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermPositions.java (revision 954967) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermPositions.java (working copy) @@ -1,100 +0,0 @@ -package org.apache.lucene.store.instantiated; - -/** - * Copyright 2006 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.TermPositions; - -import java.io.IOException; - -/** - * A {@link org.apache.lucene.index.TermPositions} navigating an {@link InstantiatedIndexReader}. - */ -public class InstantiatedTermPositions - extends InstantiatedTermDocs - implements TermPositions { - - public int getPayloadLength() { - return currentDocumentInformation.getPayloads()[currentTermPositionIndex].length; - } - - public byte[] getPayload(byte[] data, int offset) throws IOException { - byte[] payloads = currentDocumentInformation.getPayloads()[currentTermPositionIndex]; - - // read payloads lazily - if (data == null || data.length - offset < getPayloadLength()) { - // the array is too small to store the payload data, - return payloads; - } else { - System.arraycopy(payloads, 0, data, offset, payloads.length); - return data; - } - } - - public boolean isPayloadAvailable() { - return currentDocumentInformation.getPayloads()[currentTermPositionIndex] != null; - } - - public InstantiatedTermPositions(InstantiatedIndexReader reader) { - super(reader); - } - - /** - * Returns next position in the current document. It is an error to call - * this more than {@link #freq()} times - * without calling {@link #next()}

This is - * invalid until {@link #next()} is called for - * the first time. - */ - public int nextPosition() { - currentTermPositionIndex++; - // if you get an array out of index exception here, - // it might be due to currentDocumentInformation.getIndexFromTerm not being set!! - return currentDocumentInformation.getTermPositions()[currentTermPositionIndex]; - } - - private int currentTermPositionIndex; - - /** - * Moves to the next pair in the enumeration. - *

Returns true if there is such a next pair in the enumeration. - */ - @Override - public boolean next() { - currentTermPositionIndex = -1; - return super.next(); - } - - /** - * Skips entries to the first beyond the current whose document number is - * greater than or equal to target.

Returns true iff there is such - * an entry.

Behaves as if written:

-   *   boolean skipTo(int target) {
-   *     do {
-   *       if (!next())
-   * 	     return false;
-   *     } while (target > doc());
-   *     return true;
-   *   }
-   * 
- * Some implementations are considerably more efficient than that. - */ - @Override - public boolean skipTo(int target) { - currentTermPositionIndex = -1; - return super.skipTo(target); - } -} Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java (revision 954967) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java (working copy) @@ -1,136 +0,0 @@ -package org.apache.lucene.store.instantiated; - -/** - * Copyright 2006 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; - -/** - * A {@link org.apache.lucene.index.TermDocs} navigating an {@link InstantiatedIndexReader}. - */ -public class InstantiatedTermDocs - implements TermDocs { - - private final InstantiatedIndexReader reader; - - public InstantiatedTermDocs(InstantiatedIndexReader reader) { - this.reader = reader; - } - - private int currentDocumentIndex; - protected InstantiatedTermDocumentInformation currentDocumentInformation; - protected InstantiatedTerm currentTerm; - - - public void seek(Term term) { - currentTerm = reader.getIndex().findTerm(term); - currentDocumentIndex = -1; - } - - public void seek(org.apache.lucene.index.TermEnum termEnum) { - seek(termEnum.term()); - } - - - public int doc() { - return currentDocumentInformation.getDocument().getDocumentNumber(); - } - - public int freq() { - return currentDocumentInformation.getTermPositions().length; - } - - - public boolean next() { - if (currentTerm != null) { - currentDocumentIndex++; - if (currentDocumentIndex < currentTerm.getAssociatedDocuments().length) { - currentDocumentInformation = currentTerm.getAssociatedDocuments()[currentDocumentIndex]; - if (reader.isDeleted(currentDocumentInformation.getDocument().getDocumentNumber())) { - return next(); - } else { - return true; - } - } else { - // mimic SegmentTermDocs - currentDocumentIndex = currentTerm.getAssociatedDocuments().length -1; - } - } - return false; - } - - - public int read(int[] docs, int[] freqs) { - int i; - for (i = 0; i < docs.length; i++) { - if (!next()) { - break; - } - docs[i] = doc(); - freqs[i] = freq(); - } - return i; - } - - /** - * Skips entries to the first beyond the current whose document number is - * greater than or equal to target.

Returns true if there is such - * an entry.

Behaves as if written:

-   *   boolean skipTo(int target) {
-   *     do {
-   *       if (!next())
-   * 	     return false;
-   *     } while (target > doc());
-   *     return true;
-   *   }
-   * 
- * This implementation is considerably more efficient than that. - * - */ - public boolean skipTo(int target) { - if (currentTerm == null) { - return false; - } - - if (currentDocumentIndex >= target) { - return next(); - } - - int startOffset = currentDocumentIndex >= 0 ? currentDocumentIndex : 0; - int pos = currentTerm.seekCeilingDocumentInformationIndex(target, startOffset); - - if (pos == -1) { - // mimic SegmentTermDocs that positions at the last index - currentDocumentIndex = currentTerm.getAssociatedDocuments().length -1; - return false; - } - - currentDocumentInformation = currentTerm.getAssociatedDocuments()[pos]; - currentDocumentIndex = pos; - if (reader.hasDeletions() && reader.isDeleted(currentDocumentInformation.getDocument().getDocumentNumber())) { - return next(); - } else { - return true; - } - } - - /** - * Does nothing - */ - public void close() { - } -} Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java (revision 954967) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java (working copy) @@ -20,8 +20,9 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.PriorityQueue; @@ -88,16 +89,16 @@ IndexReader ir = IndexReader.open(dir, true); try { int threshold = ir.maxDoc() / 10; // ignore words too common. - TermEnum terms = ir.terms(new Term(field,"")); - while (terms.next()) { - if (!field.equals(terms.term().field())) { - break; + Terms terms = MultiFields.getTerms(ir, field); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + while (termsEnum.next() != null) { + int df = termsEnum.docFreq(); + if (df processed (delete) " + numDeleted + " docs"); r.decRef(); Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java =================================================================== --- lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java (revision 954967) +++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.BytesRef; public class TestMultiPassIndexSplitter extends LuceneTestCase { IndexReader input; @@ -62,30 +63,30 @@ assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); // rounding error Document doc = ir.document(0); assertEquals("0", doc.get("id")); - Term t; - TermEnum te; - t = new Term("id", "1"); - te = ir.terms(t); - assertNotSame(t, te.term()); + TermsEnum te = MultiFields.getTerms(ir, "id").iterator(); + assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("1"))); + assertNotSame("1", te.term().utf8ToString()); ir.close(); ir = IndexReader.open(dirs[1], true); assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); doc = ir.document(0); assertEquals("1", doc.get("id")); - t = new Term("id", "0"); - te = ir.terms(t); - assertNotSame(t, te.term()); + te = MultiFields.getTerms(ir, "id").iterator(); + assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("0"))); + + assertNotSame("0", te.term().utf8ToString()); ir.close(); ir = IndexReader.open(dirs[2], true); assertTrue(ir.numDocs() - NUM_DOCS / 3 <= 1); doc = ir.document(0); assertEquals("2", doc.get("id")); - t = new Term("id", "1"); - te = ir.terms(t); - assertNotSame(t, te.term()); - t = new Term("id", "0"); - te = ir.terms(t); - assertNotSame(t, te.term()); + + te = MultiFields.getTerms(ir, "id").iterator(); + assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("1"))); + assertNotSame("1", te.term()); + + assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("0"))); + assertNotSame("0", te.term().utf8ToString()); } /** @@ -117,10 +118,9 @@ doc = ir.document(0); assertEquals(start + "", doc.get("id")); // make sure the deleted doc is not here - Term t; - TermEnum te; - t = new Term("id", (NUM_DOCS - 1) + ""); - te = ir.terms(t); - assertNotSame(t, te.term()); + TermsEnum te = MultiFields.getTerms(ir, "id").iterator(); + Term t = new Term("id", (NUM_DOCS - 1) + ""); + assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef(t.text()))); + assertNotSame(t.text(), te.term().utf8ToString()); } } Index: lucene/contrib/misc/src/java/org/apache/lucene/misc/LengthNormModifier.java =================================================================== --- lucene/contrib/misc/src/java/org/apache/lucene/misc/LengthNormModifier.java (revision 954967) +++ lucene/contrib/misc/src/java/org/apache/lucene/misc/LengthNormModifier.java (working copy) @@ -16,14 +16,17 @@ * limitations under the License. */ -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import java.io.File; import java.io.IOException; @@ -109,37 +112,25 @@ String fieldName = StringHelper.intern(field); int[] termCounts = new int[0]; - IndexReader reader = null; - TermEnum termEnum = null; - TermDocs termDocs = null; + IndexReader reader = IndexReader.open(dir, false); try { - reader = IndexReader.open(dir, false); + termCounts = new int[reader.maxDoc()]; - try { - termEnum = reader.terms(new Term(field)); - try { - termDocs = reader.termDocs(); - do { - Term term = termEnum.term(); - if (term != null && term.field().equals(fieldName)) { - termDocs.seek(termEnum.term()); - while (termDocs.next()) { - termCounts[termDocs.doc()] += termDocs.freq(); - } - } - } while (termEnum.next()); - } finally { - if (null != termDocs) termDocs.close(); + Bits delDocs = MultiFields.getDeletedDocs(reader); + DocsEnum docs = null; + + Terms terms = MultiFields.getTerms(reader, field); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + while(termsEnum.next() != null) { + docs = termsEnum.docs(delDocs, docs); + int doc; + while ((doc = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) { + termCounts[doc] += docs.freq(); + } } - } finally { - if (null != termEnum) termEnum.close(); } - } finally { - if (null != reader) reader.close(); - } - - try { - reader = IndexReader.open(dir, false); + for (int d = 0; d < termCounts.length; d++) { if (! reader.isDeleted(d)) { byte norm = Similarity.encodeNorm(sim.lengthNorm(fieldName, termCounts[d])); @@ -147,7 +138,7 @@ } } } finally { - if (null != reader) reader.close(); + reader.close(); } } Index: lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java =================================================================== --- lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 954967) +++ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) @@ -44,11 +44,8 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermPositionVector; -import org.apache.lucene.index.TermPositions; import org.apache.lucene.index.TermVectorMapper; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.search.Collector; @@ -732,7 +729,6 @@ /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// - private static final Term MATCH_ALL_TERM = new Term(""); /** * Search support for Lucene framework integration; implements all methods @@ -769,12 +765,6 @@ } @Override - public TermEnum terms() { - if (DEBUG) System.err.println("MemoryIndexReader.terms()"); - return terms(MATCH_ALL_TERM); - } - - @Override public Fields fields() { sortFields(); @@ -1011,202 +1001,6 @@ } @Override - public TermEnum terms(Term term) { - if (DEBUG) System.err.println("MemoryIndexReader.terms: " + term); - - int i; // index into info.sortedTerms - int j; // index into sortedFields - - sortFields(); - if (sortedFields.length == 1 && sortedFields[0].getKey() == term.field()) { - j = 0; // fast path - } else { - j = Arrays.binarySearch(sortedFields, term.field(), termComparator); - } - - if (j < 0) { // not found; choose successor - j = -j -1; - i = 0; - if (j < sortedFields.length) getInfo(j).sortTerms(); - } else { // found - Info info = getInfo(j); - info.sortTerms(); - i = Arrays.binarySearch(info.sortedTerms, term.text(), termComparator); - if (i < 0) { // not found; choose successor - i = -i -1; - if (i >= info.sortedTerms.length) { // move to next successor - j++; - i = 0; - if (j < sortedFields.length) getInfo(j).sortTerms(); - } - } - } - final int ix = i; - final int jx = j; - - return new TermEnum() { - - private int srtTermsIdx = ix; // index into info.sortedTerms - private int srtFldsIdx = jx; // index into sortedFields - - @Override - public boolean next() { - if (DEBUG) System.err.println("TermEnum.next"); - if (srtFldsIdx >= sortedFields.length) return false; - Info info = getInfo(srtFldsIdx); - if (++srtTermsIdx < info.sortedTerms.length) return true; - - // move to successor - srtFldsIdx++; - srtTermsIdx = 0; - if (srtFldsIdx >= sortedFields.length) return false; - getInfo(srtFldsIdx).sortTerms(); - return true; - } - - @Override - public Term term() { - if (DEBUG) System.err.println("TermEnum.term: " + srtTermsIdx); - if (srtFldsIdx >= sortedFields.length) return null; - Info info = getInfo(srtFldsIdx); - if (srtTermsIdx >= info.sortedTerms.length) return null; -// if (DEBUG) System.err.println("TermEnum.term: " + i + ", " + info.sortedTerms[i].getKey()); - return createTerm(info, srtFldsIdx, info.sortedTerms[srtTermsIdx].getKey()); - } - - @Override - public int docFreq() { - if (DEBUG) System.err.println("TermEnum.docFreq"); - if (srtFldsIdx >= sortedFields.length) return 0; - Info info = getInfo(srtFldsIdx); - if (srtTermsIdx >= info.sortedTerms.length) return 0; - return numPositions(info.getPositions(srtTermsIdx)); - } - - @Override - public void close() { - if (DEBUG) System.err.println("TermEnum.close"); - } - - /** Returns a new Term object, minimizing String.intern() overheads. */ - private Term createTerm(Info info, int pos, String text) { - // Assertion: sortFields has already been called before - Term template = info.template; - if (template == null) { // not yet cached? - String fieldName = sortedFields[pos].getKey(); - template = new Term(fieldName); - info.template = template; - } - - return template.createTerm(text); - } - - }; - } - - @Override - public TermPositions termPositions() { - if (DEBUG) System.err.println("MemoryIndexReader.termPositions"); - - return new TermPositions() { - - private boolean hasNext; - private int cursor = 0; - private ArrayIntList current; - private Term term; - - public void seek(Term term) { - this.term = term; - if (DEBUG) System.err.println(".seek: " + term); - if (term == null) { - hasNext = true; // term==null means match all docs - } else { - Info info = getInfo(term.field()); - current = info == null ? null : info.getPositions(term.text()); - hasNext = (current != null); - cursor = 0; - } - } - - public void seek(TermEnum termEnum) { - if (DEBUG) System.err.println(".seekEnum"); - seek(termEnum.term()); - } - - public int doc() { - if (DEBUG) System.err.println(".doc"); - return 0; - } - - public int freq() { - int freq = current != null ? numPositions(current) : (term == null ? 1 : 0); - if (DEBUG) System.err.println(".freq: " + freq); - return freq; - } - - public boolean next() { - if (DEBUG) System.err.println(".next: " + current + ", oldHasNext=" + hasNext); - boolean next = hasNext; - hasNext = false; - return next; - } - - public int read(int[] docs, int[] freqs) { - if (DEBUG) System.err.println(".read: " + docs.length); - if (!hasNext) return 0; - hasNext = false; - docs[0] = 0; - freqs[0] = freq(); - return 1; - } - - public boolean skipTo(int target) { - if (DEBUG) System.err.println(".skipTo: " + target); - return next(); - } - - public void close() { - if (DEBUG) System.err.println(".close"); - } - - public int nextPosition() { // implements TermPositions - int pos = current.get(cursor); - cursor += stride; - if (DEBUG) System.err.println(".nextPosition: " + pos); - return pos; - } - - /** - * Not implemented. - * @throws UnsupportedOperationException - */ - public int getPayloadLength() { - throw new UnsupportedOperationException(); - } - - /** - * Not implemented. - * @throws UnsupportedOperationException - */ - public byte[] getPayload(byte[] data, int offset) throws IOException { - throw new UnsupportedOperationException(); - } - - public boolean isPayloadAvailable() { - // unsuported - return false; - } - - }; - } - - @Override - public TermDocs termDocs() { - if (DEBUG) System.err.println("MemoryIndexReader.termDocs"); - return termPositions(); - } - - @Override public TermFreqVector[] getTermFreqVectors(int docNumber) { if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors"); TermFreqVector[] vectors = new TermFreqVector[fields.size()]; Index: lucene/contrib/lucli/src/java/lucli/LuceneMethods.java =================================================================== --- lucene/contrib/lucli/src/java/lucli/LuceneMethods.java (revision 954967) +++ lucene/contrib/lucli/src/java/lucli/LuceneMethods.java (working copy) @@ -43,8 +43,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.queryParser.MultiFieldQueryParser; @@ -58,6 +60,7 @@ import org.apache.lucene.search.Searcher; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; +import org.apache.lucene.util.BytesRef; /** * Various methods that interact with Lucene and provide info about the @@ -342,13 +345,21 @@ public void terms(String field) throws IOException { TreeMap termMap = new TreeMap(); IndexReader indexReader = IndexReader.open(indexName, true); - TermEnum terms = indexReader.terms(); - while (terms.next()) { - Term term = terms.term(); - //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq()); - //if we're either not looking by field or we're matching the specific field - if ((field == null) || field.equals(term.field())) - termMap.put(term.field() + ":" + term.text(), Integer.valueOf((terms.docFreq()))); + Fields fields = MultiFields.getFields(indexReader); + if (fields != null) { + FieldsEnum fieldsEnum = fields.iterator(); + String curField; + while((curField = fieldsEnum.next()) != null) { + TermsEnum terms = fieldsEnum.terms(); + BytesRef text; + while ((text = terms.next()) != null) { + //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq()); + //if we're either not looking by field or we're matching the specific field + if ((field == null) || field.equals(curField)) { + termMap.put(curField + ":" + text.utf8ToString(), Integer.valueOf((terms.docFreq()))); + } + } + } } Iterator termIterator = termMap.keySet().iterator(); Index: lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java (revision 954967) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java (working copy) @@ -27,9 +27,11 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.BytesRef; public class DuplicateFilterTest extends LuceneTestCase { private static final String KEY_FIELD = "url"; @@ -134,11 +136,14 @@ { Document d=searcher.doc(hits[i].doc); String url=d.get(KEY_FIELD); - TermDocs td = reader.termDocs(new Term(KEY_FIELD,url)); + DocsEnum td = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + KEY_FIELD, + new BytesRef(url)); int lastDoc=0; - while(td.next()) + while(td.nextDoc() != DocsEnum.NO_MORE_DOCS) { - lastDoc=td.doc(); + lastDoc=td.docID(); } assertEquals("Duplicate urls should return last doc",lastDoc, hits[i].doc); } @@ -155,10 +160,13 @@ { Document d=searcher.doc(hits[i].doc); String url=d.get(KEY_FIELD); - TermDocs td = reader.termDocs(new Term(KEY_FIELD,url)); + DocsEnum td = MultiFields.getTermDocsEnum(reader, + MultiFields.getDeletedDocs(reader), + KEY_FIELD, + new BytesRef(url)); int lastDoc=0; - td.next(); - lastDoc=td.doc(); + td.nextDoc(); + lastDoc=td.docID(); assertEquals("Duplicate urls should return first doc",lastDoc, hits[i].doc); } } Index: lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java (revision 954967) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; @@ -80,10 +80,9 @@ } public void testMatchAll() throws Exception { - TermEnum terms = new RegexQuery(new Term(FN, "jum.")).getEnum(searcher.getIndexReader()); + TermsEnum terms = new RegexQuery(new Term(FN, "jum.")).getTermsEnum(searcher.getIndexReader()); // no term should match - assertNull(terms.term()); - assertFalse(terms.next()); + assertNull(terms.next()); } public void testRegex1() throws Exception { Index: lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java (revision 954967) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java (working copy) @@ -24,8 +24,14 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Fields; import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Bits; /** * Constructs a filter for docs matching any of the terms added to this class. @@ -52,28 +58,37 @@ * @see org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.IndexReader) */ @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException - { + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { OpenBitSet result=new OpenBitSet(reader.maxDoc()); - TermDocs td = reader.termDocs(); - try - { - for (Iterator iter = terms.iterator(); iter.hasNext();) - { - Term term = iter.next(); - td.seek(term); - while (td.next()) - { - result.set(td.doc()); - } + Fields fields = MultiFields.getFields(reader); + BytesRef br = new BytesRef(); + Bits delDocs = MultiFields.getDeletedDocs(reader); + if (fields != null) { + String lastField = null; + Terms termsC = null; + TermsEnum termsEnum = null; + DocsEnum docs = null; + for (Iterator iter = terms.iterator(); iter.hasNext();) { + Term term = iter.next(); + if (term.field() != lastField) { + termsC = fields.terms(term.field()); + termsEnum = termsC.iterator(); + lastField = term.field(); + } + + if (terms != null) { + br.copy(term.text()); + if (termsEnum.seek(br) == TermsEnum.SeekStatus.FOUND) { + docs = termsEnum.docs(delDocs, docs); + while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { + result.set(docs.docID()); } + } } - finally - { - td.close(); - } - return result; - } + } + } + return result; + } @Override public boolean equals(Object obj) Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java (revision 954967) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java (working copy) @@ -18,7 +18,7 @@ */ import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.FilteredTermEnum; +import org.apache.lucene.search.FilteredTermsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.ToStringUtils; @@ -60,8 +60,8 @@ } @Override - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new RegexTermEnum(reader, term, regexImpl); + protected FilteredTermsEnum getTermsEnum(IndexReader reader) throws IOException { + return new RegexTermsEnum(reader, term, regexImpl); } @Override Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermEnum.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermEnum.java (revision 954967) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermEnum.java (working copy) @@ -1,83 +0,0 @@ -package org.apache.lucene.search.regex; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.search.FilteredTermEnum; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; - -import java.io.IOException; - -/** - * Subclass of FilteredTermEnum for enumerating all terms that match the - * specified regular expression term using the specified regular expression - * implementation. - *

- * Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. - */ - -public class RegexTermEnum extends FilteredTermEnum { - private String field = ""; - private String pre = ""; - private boolean endEnum = false; - private RegexCapabilities regexImpl; - - public RegexTermEnum(IndexReader reader, Term term, RegexCapabilities regexImpl) throws IOException { - super(); - field = term.field(); - String text = term.text(); - this.regexImpl = regexImpl; - - regexImpl.compile(text); - - pre = regexImpl.prefix(); - if (pre == null) pre = ""; - - setEnum(reader.terms(new Term(term.field(), pre))); - } - - @Override - protected final boolean termCompare(Term term) { - if (field == term.field()) { - String searchText = term.text(); - if (searchText.startsWith(pre)) { - return regexImpl.match(searchText); - } - } - endEnum = true; - return false; - } - - @Override - public final float difference() { -// TODO: adjust difference based on distance of searchTerm.text() and term().text() - return 1.0f; - } - - @Override - public final boolean endEnum() { - return endEnum; - } - - @Override - public void close() throws IOException { - super.close(); - field = null; - } -} Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (revision 0) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (revision 0) @@ -0,0 +1,67 @@ +package org.apache.lucene.search.regex; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.FilteredTermsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +/** + * Subclass of FilteredTermEnum for enumerating all terms that match the + * specified regular expression term using the specified regular expression + * implementation. + *

+ * Term enumerations are always ordered by Term.compareTo(). Each term in + * the enumeration is greater than all that precede it. + */ + +public class RegexTermsEnum extends FilteredTermsEnum { + private String field = ""; + private String pre = ""; + private boolean endEnum = false; + private RegexCapabilities regexImpl; + private final BytesRef prefixRef; + + public RegexTermsEnum(IndexReader reader, Term term, RegexCapabilities regexImpl) throws IOException { + super(reader, term.field()); + String text = term.text(); + this.regexImpl = regexImpl; + + regexImpl.compile(text); + + pre = regexImpl.prefix(); + if (pre == null) pre = ""; + + setInitialSeekTerm(prefixRef = new BytesRef(pre)); + } + + @Override + protected AcceptStatus accept(BytesRef term) { + if (term.startsWith(prefixRef)) { + // TODO: set BoostAttr based on distance of + // searchTerm.text() and term().text() + String text = term.utf8ToString(); + return regexImpl.match(text) ? AcceptStatus.YES : AcceptStatus.NO; + } else { + return AcceptStatus.NO; + } + } +} Property changes on: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/MIGRATE.txt =================================================================== --- lucene/MIGRATE.txt (revision 954967) +++ lucene/MIGRATE.txt (working copy) @@ -1,5 +1,5 @@ -LUCENE-2380 +LUCENE-2380: FieldCache.getStrings/Index --> FieldCache.getDocTerms/Index * The field values returned when sorting by SortField.STRING are now BytesRef. You can call value.utf8ToString() to convert back to @@ -43,3 +43,171 @@ String. You can call the .utf8ToString() method on the BytesRef instances, if necessary. + + +LUCENE-1458, LUCENE-2111: Flexible Indexing + + Flexible indexing changed the low level fields/terms/docs/positions + enumeration APIs. Here are the major changes: + + * Terms are now binary in nature (arbitrary byte[]), represented + by the BytesRef class (which provides an offset + length "slice" + into an existing byte[]). + + * Fields are separately enumerated (FieldsEnum) from the terms + within each field (TermEnum). So instead of this: + + TermEnum termsEnum = ...; + while(termsEnum.next()) { + Term t = termsEnum.term(); + System.out.println("field=" + t.field() + "; text=" + t.text()); + } + + Do this: + + FieldsEnum fieldsEnum = ...; + String field; + while((field = fieldsEnum.next()) != null) { + TermsEnum termsEnum = fieldsEnum.terms(); + BytesRef text; + while((text = termsEnum.next()) != null) { + System.out.println("field=" + field + "; text=" + text.utf8ToString()); + } + + * TermDocs is renamed to DocsEnum. Instead of this: + + while(td.next()) { + int doc = td.doc(); + ... + } + + do this: + + int doc; + while((doc = td.next()) != DocsEnum.NO_MORE_DOCS) { + ... + } + + Instead of this: + + if (td.skipTo(target)) { + int doc = td.doc(); + ... + } + + do this: + + if ((doc=td.skipTo(target)) != DocsEnum.NO_MORE_DOCS) { + ... + } + + * TermPositions is renamed to DocsAndPositionsEnum, and no longer + extends the docs only enumerator (DocsEnum). + + * Deleted docs are no longer implicitly filtered from + docs/positions enums. Instead, you pass a Bits + skipDocs (set bits are skipped) when obtaining the enums. Also, + you can now ask a reader for its deleted docs. + + * The docs/positions enums cannot seek to a term. Instead, + TermsEnum is able to seek, and then you request the + docs/positions enum from that TermsEnum. + + * TermsEnum's seek method returns more information. So instead of + this: + + Term t; + TermEnum termEnum = reader.terms(t); + if (t.equals(termEnum.term())) { + ... + } + + do this: + + TermsEnum termsEnum = ...; + BytesRef text; + if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) { + ... + } + + SeekStatus also contains END (enumerator is done) and NOT_FOUND + (term was not found but enumerator is now positioned to the next + term). + + * TermsEnum has an ord() method, returning the long numeric + ordinal (ie, first term is 0, next is 1, and so on) for the term + it's not positioned to. There is also a corresponding seek(long + ord) method. Note that these methods are optional; in + particular the MultiFields TermsEnum does not implement them. + + + How you obtain the enums has changed. The primary entry point is + the Fields class. If you know your reader is a single segment + reader, do this: + + Fields fields = reader.Fields(); + if (fields != null) { + ... + } + + If the reader might be multi-segment, you must do this: + + Fields fields = MultiFields.getFields(reader); + if (fields != null) { + ... + } + + The fields may be null (eg if the reader has no fields). + + Note that the MultiFields approach entails a performance hit on + MultiReaders, as it must merge terms/docs/positions on the fly. It's + generally better to instead get the sequential readers (use + oal.util.ReaderUtil) and then step through those readers yourself, + if you can (this is how Lucene drives searches). + + If you pass a SegmentReader to MultiFields.fiels it will simply + return reader.fields(), so there is no performance hit in that + case. + + Once you have a non-null Fields you can do this: + + Terms terms = fields.terms("field"); + if (terms != null) { + ... + } + + The terms may be null (eg if the field does not exist). + + Once you have a non-null terms you can get an enum like this: + + TermsEnum termsEnum = terms.iterator(); + + The returned TermsEnum will not be null. + + You can then .next() through the TermsEnum, or seek. If you want a + DocsEnum, do this: + + Bits skipDocs = MultiFields.getDeletedDocs(reader); + DocsEnum docsEnum = null; + + docsEnum = termsEnum.docs(skipDocs, docsEnum); + + You can pass in a prior DocsEnum and it will be reused if possible. + + Likewise for DocsAndPositionsEnum. + + IndexReader has several sugar methods (which just go through the + above steps, under the hood). Instead of: + + Term t; + TermDocs termDocs = reader.termDocs(); + termDocs.seek(t); + + do this: + + String field; + BytesRef text; + DocsEnum docsEnum = reader.termDocsEnum(reader.getDeletedDocs(), field, text); + + Likewise for DocsAndPositionsEnum. +