diff -r 77bcab5800a9 lucene/src/java/org/apache/lucene/index/CheckIndex.java --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java Sun Oct 03 09:16:20 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java Sun Oct 03 06:18:48 2010 -0400 @@ -18,6 +18,8 @@ */ import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -573,6 +575,8 @@ final int maxDoc = reader.maxDoc(); final Bits delDocs = reader.getDeletedDocs(); + final IndexSearcher is = new IndexSearcher(reader); + try { if (infoStream != null) { @@ -584,7 +588,10 @@ msg("OK [no fields/terms]"); return status; } - + + DocsEnum docs = null; + DocsAndPositionsEnum postings = null; + final FieldsEnum fieldsEnum = fields.iterator(); while(true) { final String field = fieldsEnum.next(); @@ -594,9 +601,6 @@ final TermsEnum terms = fieldsEnum.terms(); - DocsEnum docs = null; - DocsAndPositionsEnum postings = null; - boolean hasOrd = true; final long termCountStart = status.termCount; @@ -706,6 +710,70 @@ } } } + + // Test seek to last term: + if (lastTerm != null) { + if (terms.seek(lastTerm) != TermsEnum.SeekStatus.FOUND) { + throw new RuntimeException("seek to last term " + lastTerm + " failed"); + } + + is.search(new TermQuery(new Term(field, lastTerm)), 1); + } + + // Test seeking by ord + if (hasOrd && status.termCount-termCountStart > 0) { + long termCount; + try { + termCount = fields.terms(field).getUniqueTermCount(); + } catch (UnsupportedOperationException uoe) { + termCount = -1; + } + + if (termCount != -1 && termCount != status.termCount - termCountStart) { + throw new RuntimeException("termCount mismatch " + termCount + " vs " + (status.termCount - termCountStart)); + } + + termCount = status.termCount; + + int seekCount = (int) Math.min(10000L, termCount); + if (seekCount > 0) { + BytesRef[] seekTerms = new BytesRef[seekCount]; + + // Seek by ord + for(int i=seekCount-1;i>=0;i--) { + long ord = i*(termCount/seekCount); + terms.seek(ord); + seekTerms[i] = new BytesRef(terms.term()); + } + + // Seek by term + long totDocCount = 0; + for(int i=seekCount-1;i>=0;i--) { + if (terms.seek(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) { + throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed"); + } + + docs = terms.docs(delDocs, docs); + if (docs == null) { + throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]); + } + + while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { + totDocCount++; + } + } + + // TermQuery + long totDocCount2 = 0; + for(int i=0;i= tokensPerDoc) { + return false; + } + final byte[] bs = bytes.bytes; + for(int i=bytes.length-1;i>=0;i--) { + int b = bs[i]&0xff; + if (b == 0xff) { + bs[i] = 0; + } else { + bs[i] = (byte) (++b); + tokenCount++; + return true; + } + } + bytes.length++; + bs[0] = 1; + tokenCount++; + return true; + } + + public void reset() { + tokenCount = 0; + } + + private final static class MyTermAttributeImpl extends AttributeImpl implements TermToBytesRefAttribute { + public int toBytesRef(BytesRef bs) { + bs.copy(bytes); + return bytes.hashCode(); + } + @Override + public void clear() { + } + + @Override + public boolean equals(Object other) { + return other == this; + } + + @Override + public int hashCode() { + return System.identityHashCode(this); + } + + @Override + public void copyTo(AttributeImpl target) { + } + + @Override + public Object clone() { + throw new UnsupportedOperationException(); + } + } + + private static final class MyAttributeFactory extends AttributeFactory { + private final AttributeFactory delegate; + + public MyAttributeFactory(AttributeFactory delegate) { + this.delegate = delegate; + } + + @Override + public AttributeImpl createAttributeInstance(Class attClass) { + if (attClass == TermToBytesRefAttribute.class) + return new MyTermAttributeImpl(); + if (CharTermAttribute.class.isAssignableFrom(attClass)) + throw new IllegalArgumentException("no"); + return delegate.createAttributeInstance(attClass); + } + } + } + + @Ignore("Takes ~4 hours to run on a fast machine!!") + public void test2BTerms() throws IOException { + + long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000; + + int TERMS_PER_DOC = 1000000; + + Directory dir = FSDirectory.open(_TestUtil.getTempDir("2BTerms")); + IndexWriter w = new IndexWriter(dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()) + .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) + .setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler())); + ((LogMergePolicy) w.getConfig().getMergePolicy()).setUseCompoundFile(false); + ((LogMergePolicy) w.getConfig().getMergePolicy()).setUseCompoundDocStore(false); + ((LogMergePolicy) w.getConfig().getMergePolicy()).setMergeFactor(10); + + Document doc = new Document(); + Field field = new Field("field", new MyTokenStream(TERMS_PER_DOC)); + field.setOmitTermFreqAndPositions(true); + field.setOmitNorms(true); + doc.add(field); + //w.setInfoStream(System.out); + final int numDocs = (int) (TERM_COUNT/TERMS_PER_DOC); + System.out.println(numDocs + " docs"); + for(int i=0;i