diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/document/Field.java jakarta-lucene/src/java/org/apache/lucene/document/Field.java --- jakarta-lucene-orig/src/java/org/apache/lucene/document/Field.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/document/Field.java Sat Apr 3 12:01:31 2004 @@ -31,13 +31,14 @@ */ public final class Field implements java.io.Serializable { - private String name = "body"; - private String stringValue = null; - private boolean storeTermVector = false; - private Reader readerValue = null; - private boolean isStored = false; - private boolean isIndexed = true; - private boolean isTokenized = true; + // everything is immutable except boost + private final String name; + private final String stringValue; + private final boolean storeTermVector; + private final Reader readerValue; + private final boolean isStored; + private final boolean isIndexed; + private final boolean isTokenized; private float boost = 1.0f; @@ -132,9 +133,7 @@ not stored in the index verbatim. Useful for longer text fields, like "body". */ public static final Field Text(String name, Reader value, boolean storeTermVector) { - Field f = new Field(name, value); - f.storeTermVector = storeTermVector; - return f; + return new Field(name, value, storeTermVector); } /** The name of the field (e.g., "date", "subject", "title", or "body") @@ -177,6 +176,7 @@ this.name = name.intern(); // field names are interned this.stringValue = string; + this.readerValue = null; this.isStored = store; this.isIndexed = index; this.isTokenized = token; @@ -184,13 +184,22 @@ } Field(String name, Reader reader) { + this(name, reader, false); + } + + Field(String name, Reader reader, boolean storeTermVector) { if (name == null) - throw new IllegalArgumentException("name cannot be null"); + throw new IllegalArgumentException("name cannot be null"); if (reader == null) - throw new IllegalArgumentException("value cannot be null"); + throw new IllegalArgumentException("value cannot be null"); this.name = name.intern(); // field names are interned + this.stringValue = null; this.readerValue = reader; + this.isStored = false; + this.isIndexed = true; + this.isTokenized = true; + this.storeTermVector = storeTermVector; } /** True iff the value of the field is to be stored in the index for return diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/DirectorySegmentInfo.java jakarta-lucene/src/java/org/apache/lucene/index/DirectorySegmentInfo.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/DirectorySegmentInfo.java Wed Dec 31 16:00:00 1969 +++ jakarta-lucene/src/java/org/apache/lucene/index/DirectorySegmentInfo.java Sun Mar 14 22:04:19 2004 @@ -0,0 +1,54 @@ +package org.apache.lucene.index; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; + +import java.io.IOException; + +// Created on Mar 14, 2004 + +/** A SegmentInfo describing a segment that is stored in a Directory */ +final class DirectorySegmentInfo extends SegmentInfo { + + /** the directory this segment came from */ + final Directory dir; + /** the name of the segment in the directory */ + final String name; + /** the number of docs in the segment */ + final int docCount; + + /** + * creates a SegmentInfo based on a segment in a directory + */ + DirectorySegmentInfo(Directory dir, String name, int docCount) { + this.dir = dir; + this.name = name; + this.docCount = docCount; + } + + boolean isTransient() { + return false; + } + + IndexSource openIndex() throws IOException { + return new SegmentReader(dir, name); + } + + boolean hasDeletions() throws IOException { + return dir.fileExists(name + ".del"); + } + + boolean usesCompoundFile() throws IOException { + return dir.fileExists(name + ".cfs"); + } + + boolean isStoredInDirectory(Directory directory) { + return dir==directory; + } + + String getDescription() { + return name + " (" + docCount + " docs)"; + } + + int getDocCount() { return docCount; } +} diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/DocumentWriter.java jakarta-lucene/src/java/org/apache/lucene/index/DocumentWriter.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/DocumentWriter.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/DocumentWriter.java Sat Apr 3 12:01:31 2004 @@ -19,9 +19,7 @@ import java.io.IOException; import java.io.Reader; import java.io.StringReader; -import java.util.Hashtable; -import java.util.Enumeration; -import java.util.Arrays; +import java.util.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -81,7 +79,7 @@ invertDocument(doc); // sort postingTable into an array - Posting[] postings = sortPostingTable(); + Posting[] postings = sortPostingTable(postingTable); /* for (int i = 0; i < postings.length; i++) { @@ -179,12 +177,14 @@ } } - private final Posting[] sortPostingTable() { + static final Posting[] sortPostingTable(Map postingTable) { // copy postingTable into an array Posting[] array = new Posting[postingTable.size()]; - Enumeration postings = postingTable.elements(); - for (int i = 0; postings.hasMoreElements(); i++) - array[i] = (Posting) postings.nextElement(); + + Iterator it = postingTable.values().iterator(); + for(int i=0; iFilterIndexReader itself simply implements all abstract methods * of IndexReader with versions that pass all requests to the - * contained index reader. Subclasses of FilterIndexReader may + * contained index source. Subclasses of FilterIndexReader may * further override some of these methods and may also provide additional * methods and fields. */ diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/IndexReader.java jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/IndexReader.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java Sat Apr 3 12:01:33 2004 @@ -43,7 +43,7 @@ @author Doug Cutting @version $Id: IndexReader.java,v 1.29 2004/03/29 22:48:02 cutting Exp $ */ -public abstract class IndexReader { +public abstract class IndexReader extends IndexSource { protected IndexReader(Directory directory) { this.directory = directory; stale = false; @@ -63,7 +63,7 @@ /** Returns an IndexReader reading the index in an FSDirectory in the named path. */ - public static IndexReader open(File path) throws IOException { + public static IndexSource open(File path) throws IOException { return open(FSDirectory.getDirectory(path, false)); } @@ -77,11 +77,11 @@ SegmentInfos infos = new SegmentInfos(); infos.read(directory); if (infos.size() == 1) { // index is optimized - return new SegmentReader(infos, infos.info(0), true); + return new SegmentReader(infos, (DirectorySegmentInfo)infos.info(0), true); } else { IndexReader[] readers = new IndexReader[infos.size()]; for (int i = 0; i < infos.size(); i++) - readers[i] = new SegmentReader(infos, infos.info(i), i==infos.size()-1); + readers[i] = new SegmentReader(infos, (DirectorySegmentInfo)infos.info(i), i==infos.size()-1); return new MultiReader(directory, readers); } } @@ -176,18 +176,7 @@ return SegmentInfos.readCurrentVersion(directory); } - /** Return an array of term frequency vectors for the specified document. - * The array contains a vector for each vectorized field in the document. - * Each vector contains terms and frequencies for all terms - * in a given vectorized field. - * If no such fields existed, the method returns null. - * - * @see Field#isTermVectorStored() - */ - abstract public TermFreqVector[] getTermFreqVectors(int docNumber) - throws IOException; - - /** Return a term frequency vector for the specified document and field. The + /** Return a term frequency vector for the specified document and field. The * vector returned contains terms and frequencies for those terms in * the specified field of this document, if the field had storeTermVector * flag set. If the flag was not set, the method returns null. @@ -229,32 +218,6 @@ return directory.fileExists("segments"); } - /** Returns the number of documents in this index. */ - public abstract int numDocs(); - - /** Returns one greater than the largest possible document number. - This may be used to, e.g., determine how big to allocate an array which - will have an element for every document number in an index. - */ - public abstract int maxDoc(); - - /** Returns the stored fields of the nth - Document in this index. */ - public abstract Document document(int n) throws IOException; - - /** Returns true if document n has been deleted */ - public abstract boolean isDeleted(int n); - - /** Returns true if any documents have been deleted */ - public abstract boolean hasDeletions(); - - /** Returns the byte-encoded normalization factor for the named field of - * every document. This is used by the search code to score documents. - * - * @see Field#setBoost(float) - */ - public abstract byte[] norms(String field) throws IOException; - /** Reads the byte-encoded normalization factor for the named field of every * document. This is used by the search code to score documents. * @@ -287,12 +250,6 @@ } - /** Returns an enumeration of all the terms in the index. - The enumeration is ordered by Term.compareTo(). Each term - is greater than all that precede it in the enumeration. - */ - public abstract TermEnum terms() throws IOException; - /** Returns an enumeration of all terms after a given term. The enumeration is ordered by Term.compareTo(). Each term is greater than all that precede it in the enumeration. @@ -343,9 +300,6 @@ return termPositions; } - /** Returns an unpositioned {@link TermPositions} enumerator. */ - public abstract TermPositions termPositions() throws IOException; - /** Deletes the document numbered docNum. Once a document is deleted it will not appear in TermDocs or TermPostitions enumerations. Attempts to read its field with the {@link #document} @@ -363,8 +317,8 @@ throw new IOException("Index locked for write: " + writeLock); this.writeLock = writeLock; - // we have to check whether index has changed since this reader was opened. - // if so, this reader is no longer valid for deletion + // we have to check whether index has changed since this source was opened. + // if so, this source is no longer valid for deletion if (segmentInfos != null && SegmentInfos.readCurrentVersion(directory) > segmentInfos.getVersion()) { stale = true; this.writeLock.release(); @@ -404,11 +358,6 @@ /** Undeletes all documents currently marked as deleted in this index.*/ public abstract void undeleteAll() throws IOException; - /** - * Closes files associated with this index. - * Also saves any new deletions to disk. - * No other methods should be called after this has been called. - */ public final synchronized void close() throws IOException { doClose(); if (writeLock != null) { @@ -435,25 +384,6 @@ * @throws IOException if there is a problem with accessing the index */ public abstract Collection getFieldNames() throws IOException; - - /** - * Returns a list of all unique field names that exist in the index pointed - * to by this IndexReader. The boolean argument specifies whether the fields - * returned are indexed or not. - * @param indexed true if only indexed fields should be returned; - * false if only unindexed fields should be returned. - * @return Collection of Strings indicating the names of the fields - * @throws IOException if there is a problem with accessing the index - */ - public abstract Collection getFieldNames(boolean indexed) throws IOException; - - /** - * - * @param storedTermVector if true, returns only Indexed fields that have term vector info, - * else only indexed fields without term vector info - * @return Collection of Strings indicating the names of the fields - */ - public abstract Collection getIndexedFieldNames(boolean storedTermVector); /** * Returns true iff the index in the named directory is diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/IndexSource.java jakarta-lucene/src/java/org/apache/lucene/index/IndexSource.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/IndexSource.java Wed Dec 31 16:00:00 1969 +++ jakarta-lucene/src/java/org/apache/lucene/index/IndexSource.java Sun Mar 14 15:15:40 2004 @@ -0,0 +1,83 @@ +package org.apache.lucene.index; + +// Copyright 2004 Brian Slesinsky +// Created on Mar 13, 2004package org.apache.lucene.index; + +import org.apache.lucene.document.Document; + +import java.io.IOException; +import java.util.Collection; + +public abstract class IndexSource { + /** Return an array of term frequency vectors for the specified document. + * The array contains a vector for each vectorized field in the document. + * Each vector contains terms and frequencies for all terms + * in a given vectorized field. + * If no such fields existed, the method returns null. + * + * @see org.apache.lucene.document.Field#isTermVectorStored() + */ + public abstract TermFreqVector[] getTermFreqVectors(int docNumber) + throws IOException; + + /** Returns the number of documents in this index. */ + public abstract int numDocs(); + + /** Returns one greater than the largest possible document number. + This may be used to, e.g., determine how big to allocate an array which + will have an element for every document number in an index. + */ + public abstract int maxDoc(); + + /** Returns the stored fields of the nth + Document in this index. */ + public abstract Document document(int n) throws IOException; + + /** Returns true if document n has been deleted */ + public abstract boolean isDeleted(int n); + + /** + * Closes files associated with this index. + * Also saves any new deletions to disk. + * No other methods should be called after this has been called. + */ + public abstract void close() throws IOException; + + /** + * + * @param storedTermVector if true, returns only Indexed fields that have term vector info, + * else only indexed fields without term vector info + * @return Collection of Strings indicating the names of the fields + */ + public abstract Collection getIndexedFieldNames(boolean storedTermVector); + + /** + * Returns a list of all unique field names that exist in the index pointed + * to by this IndexReader. The boolean argument specifies whether the fields + * returned are indexed or not. + * @param indexed true if only indexed fields should be returned; + * false if only unindexed fields should be returned. + * @return Collection of Strings indicating the names of the fields + * @throws java.io.IOException if there is a problem with accessing the index + */ + public abstract Collection getFieldNames(boolean indexed) throws IOException; + + /** Returns true if any documents have been deleted */ + public abstract boolean hasDeletions(); + + /** Returns an unpositioned {@link TermPositions} enumerator. */ + public abstract TermPositions termPositions() throws IOException; + + /** Returns an enumeration of all the terms in the index. + The enumeration is ordered by Term.compareTo(). Each term + is greater than all that precede it in the enumeration. + */ + public abstract TermEnum terms() throws IOException; + + /** Returns the byte-encoded normalization factor for the named field of + * every document. This is used by the search code to score documents. + * + * @see org.apache.lucene.document.Field#setBoost(float) + */ + public abstract byte[] norms(String field) throws IOException; +} diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/IndexWriter.java jakarta-lucene/src/java/org/apache/lucene/index/IndexWriter.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/IndexWriter.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/IndexWriter.java Sat Apr 3 12:43:07 2004 @@ -108,7 +108,7 @@ private Similarity similarity = Similarity.getDefault(); // how to normalize private SegmentInfos segmentInfos = new SegmentInfos(); // the segments - private final Directory ramDirectory = new RAMDirectory(); // for temp segs + //private final Directory ramDirectory = new RAMDirectory(); // for temp segs private Lock writeLock; @@ -234,7 +234,7 @@ the directory that the index is stored in. */ public synchronized void close() throws IOException { flushRamSegments(); - ramDirectory.close(); + //ramDirectory.close(); writeLock.release(); // release write lock writeLock = null; directory.close(); @@ -259,7 +259,7 @@ int count = 0; for (int i = 0; i < segmentInfos.size(); i++) { SegmentInfo si = segmentInfos.info(i); - count += si.docCount; + count += si.getDocCount(); } return count; } @@ -294,12 +294,13 @@ * discarded. */ public void addDocument(Document doc, Analyzer analyzer) throws IOException { - DocumentWriter dw = - new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength); - String segmentName = newSegmentName(); - dw.addDocument(segmentName, doc); + //DocumentWriter dw = + // new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength); + //String segmentName = newSegmentName(); + //dw.addDocument(segmentName, doc); + InvertedDocument inv = new InvertedDocument(doc, analyzer, similarity, maxFieldLength); synchronized (this) { - segmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory)); + segmentInfos.addElement(inv); maybeMergeSegments(); } } @@ -346,10 +347,10 @@ flushRamSegments(); while (segmentInfos.size() > 1 || (segmentInfos.size() == 1 && - (SegmentReader.hasDeletions(segmentInfos.info(0)) || + (segmentInfos.info(0).hasDeletions() || (useCompoundFile && - !SegmentReader.usesCompoundFile(segmentInfos.info(0))) || - segmentInfos.info(0).dir != directory))) { + !segmentInfos.info(0).usesCompoundFile()) || + !segmentInfos.info(0).isStoredInDirectory(directory)))) { int minSegment = segmentInfos.size() - mergeFactor; mergeSegments(minSegment < 0 ? 0 : minSegment); } @@ -379,7 +380,7 @@ /** Merges the provided indexes into this index. *

After this completes, the index is optimized. */ - public synchronized void addIndexes(IndexReader[] readers) + public synchronized void addIndexes(IndexSource[] sources) throws IOException { optimize(); // start with zero or 1 seg @@ -388,15 +389,15 @@ SegmentMerger merger = new SegmentMerger(directory, mergedName, false); if (segmentInfos.size() == 1) // add existing index, if any - merger.add(new SegmentReader(segmentInfos.info(0))); + merger.add( segmentInfos.info(0).openIndex() ); - for (int i = 0; i < readers.length; i++) // add new indexes - merger.add(readers[i]); + for (int i = 0; i < sources.length; i++) // add new indexes + merger.add(sources[i]); int docCount = merger.merge(); // merge 'em segmentInfos.setSize(0); // pop old infos & add new - segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory)); + segmentInfos.addElement(new DirectorySegmentInfo(directory, mergedName, docCount)); synchronized (directory) { // in- & inter-process sync new Lock.With(directory.makeLock("commit.lock"), COMMIT_LOCK_TIMEOUT) { @@ -413,17 +414,24 @@ int minSegment = segmentInfos.size()-1; int docCount = 0; while (minSegment >= 0 && - (segmentInfos.info(minSegment)).dir == ramDirectory) { - docCount += segmentInfos.info(minSegment).docCount; + segmentInfos.info(minSegment).isTransient() ) { + docCount += segmentInfos.info(minSegment).getDocCount(); minSegment--; } if (minSegment < 0 || // add one FS segment? - (docCount + segmentInfos.info(minSegment).docCount) > mergeFactor || - !(segmentInfos.info(segmentInfos.size()-1).dir == ramDirectory)) + (docCount + segmentInfos.info(minSegment).getDocCount()) > mergeFactor || + !(segmentInfos.info(segmentInfos.size()-1).isTransient())) minSegment++; if (minSegment >= segmentInfos.size()) return; // none to merge mergeSegments(minSegment); + + // check post-condition + for(int i=0; i= 0) { SegmentInfo si = segmentInfos.info(minSegment); - if (si.docCount >= targetMergeDocs) + if (si.getDocCount() >= targetMergeDocs) break; - mergeDocs += si.docCount; + mergeDocs += si.getDocCount(); } if (mergeDocs >= targetMergeDocs) // found a merge to do @@ -462,12 +470,15 @@ for (int i = minSegment; i < segmentInfos.size(); i++) { SegmentInfo si = segmentInfos.info(i); if (infoStream != null) - infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); - IndexReader reader = new SegmentReader(si); - merger.add(reader); - if ((reader.directory() == this.directory) || // if we own the directory - (reader.directory() == this.ramDirectory)) - segmentsToDelete.addElement(reader); // queue segment for deletion + infoStream.print(" (" + si.getDocCount() + " docs)"); + IndexSource source = si.openIndex(); + merger.add(source); + if (source instanceof SegmentReader) { + SegmentReader reader = (SegmentReader) source; + if(reader.directory()==this.directory) { + segmentsToDelete.addElement(source); // queue segment for deletion + } + } } int mergedDocCount = merger.merge(); @@ -476,9 +487,8 @@ infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); } - segmentInfos.setSize(minSegment); // pop old infos & add new - segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount, - directory)); + segmentInfos.setSize(minSegment); // pop old infos & add new + segmentInfos.addElement(new DirectorySegmentInfo(directory, mergedName, mergedDocCount)); synchronized (directory) { // in- & inter-process sync new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) { diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/InvertedDocument.java jakarta-lucene/src/java/org/apache/lucene/index/InvertedDocument.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/InvertedDocument.java Wed Dec 31 16:00:00 1969 +++ jakarta-lucene/src/java/org/apache/lucene/index/InvertedDocument.java Wed Mar 17 21:49:13 2004 @@ -0,0 +1,370 @@ +package org.apache.lucene.index; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.Similarity; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.util.*; + +class InvertedDocument extends SegmentInfo { + + private final Document doc; + private final Analyzer analyzer; + private final Similarity similarity; + private final int maxFieldLength; + + // the inverted document + + private final FieldInfos fieldInfos; + private final int[] fieldLengths; + private final float[] fieldBoosts; + private final Posting[] postings; + + InvertedDocument(Document doc, Analyzer analyzer, + Similarity similarity, int maxFieldLength) + throws IOException + { + this.doc = doc; + this.analyzer = analyzer; + this.similarity = similarity; + this.maxFieldLength = maxFieldLength; + + this.fieldInfos = new FieldInfos(); + this.fieldInfos.add(doc); + + this.fieldLengths = new int[fieldInfos.size()]; + this.fieldBoosts = new float[fieldInfos.size()]; + Arrays.fill(fieldBoosts, doc.getBoost()); + + postings = invertDocument(doc); + } + + // Tokenizes the fields of a document into Postings. + private final Posting[] invertDocument(Document doc) + throws IOException { + + List postings = new ArrayList(100); + + // iterate through the fields in sorted order, + // so that we can sort each field individually. + List fieldNames = new ArrayList(fieldInfos.size()); + for(Enumeration enum = doc.fields(); enum.hasMoreElements(); ) { + Field field = (Field)enum.nextElement(); + fieldNames.add( field.name() ); + } + Collections.sort(fieldNames); + + for(int i=0; i maxFieldLength) break; + } + } finally { + stream.close(); + } + } + + fieldBoosts[fieldNumber] *= field.getBoost(); + } + + + } + + postings.addAll(postingTable.values()); + fieldLengths[fieldNumber] = length; // save field length + } + return (Posting[]) postings.toArray(new Posting[postings.size()]); + } + + private static final void addPosition(Map postingTable, String field, String text, int position) { + + Posting ti = (Posting) postingTable.get(text); + if (ti != null) { // word seen before + int freq = ti.freq; + if (ti.positions.length == freq) { // positions array is full + int[] newPositions = new int[freq * 2]; // double size + int[] positions = ti.positions; + for (int i = 0; i < freq; i++) // copy old positions to new + newPositions[i] = positions[i]; + ti.positions = newPositions; + } + ti.positions[freq] = position; // add new position + ti.freq = freq + 1; // update frequency + } else { // word not seen before + Term term = new Term(field, text, false); + postingTable.put(text, new Posting(term, position)); + } + } + + boolean isTransient() { + return true; + } + + IndexSource openIndex() throws IOException { + return new DocumentIndexSource(); + } + + boolean hasDeletions() throws IOException { + return false; + } + + boolean usesCompoundFile() throws IOException { + return false; + } + + boolean isStoredInDirectory(Directory directory) { + return false; + } + + String getDescription() { + return "inverted-doc"; + } + + int getDocCount() { + return 1; + } + + private class DocumentIndexSource extends IndexSource { + public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { + List vectors = new ArrayList(); + + String currentFieldName = null; + int termVectorStartIndex = -1; + for(int i=0; i=0 && i-termVectorStartIndex>0) { + // we have a term vector to store + vectors.add( newTermVector(postings, termVectorStartIndex, i) ); + } + + FieldInfo fi = fieldInfos.fieldInfo(currentFieldName); + termVectorStartIndex = fi.storeTermVector ? i : -1; + } + } + if(termVectorStartIndex>=0) { + vectors.add(newTermVector(postings, termVectorStartIndex, postings.length)); + } + + return (TermFreqVector[]) vectors.toArray(new TermFreqVector[vectors.size()]); + } + + private TermFreqVector newTermVector(Posting[] postings, int startIndex, int endIndex) { + String[] terms = new String[endIndex - startIndex]; + int[] freqs = new int[endIndex - startIndex]; + for(int i=0; i=0 ? postings[index] : null; + doc = -1; + nextPosIndex = 0; + } + + public void seek(TermEnum termEnum) throws IOException { + currentPosting = ((DocumentTermEnum)termEnum).getPosting(); + doc = -1; + nextPosIndex = 0; + } + + public int doc() { + return doc; + } + + public int freq() { + return currentPosting==null ? 0 : currentPosting.freq; + } + + public boolean next() throws IOException { + if(currentPosting!=null && doc==-1) { + doc = 0; + return true; + } else { + return false; + } + } + + public int read(int[] docs, int[] freqs) throws IOException { + int count = 0; + for(int i=0; i doc()); + return true; + } + + public void close() throws IOException { + } + } +} diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/MultiReader.java jakarta-lucene/src/java/org/apache/lucene/index/MultiReader.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/MultiReader.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/MultiReader.java Sat Apr 3 12:01:33 2004 @@ -94,12 +94,12 @@ public Document document(int n) throws IOException { int i = readerIndex(n); // find segment num - return readers[i].document(n - starts[i]); // dispatch to segment reader + return readers[i].document(n - starts[i]); // dispatch to segment source } public boolean isDeleted(int n) { int i = readerIndex(n); // find segment num - return readers[i].isDeleted(n - starts[i]); // dispatch to segment reader + return readers[i].isDeleted(n - starts[i]); // dispatch to segment source } public boolean hasDeletions() { return hasDeletions; } @@ -107,7 +107,7 @@ protected synchronized void doDelete(int n) throws IOException { numDocs = -1; // invalidate cache int i = readerIndex(n); // find segment num - readers[i].doDelete(n - starts[i]); // dispatch to segment reader + readers[i].doDelete(n - starts[i]); // dispatch to segment source hasDeletions = true; } @@ -117,7 +117,7 @@ hasDeletions = false; } - private int readerIndex(int n) { // find reader for doc n: + private int readerIndex(int n) { // find source for doc n: int lo = 0; // search starts array int hi = readers.length - 1; // for first element less @@ -231,7 +231,7 @@ // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < readers.length; i++) { - IndexReader reader = readers[i]; + IndexSource reader = readers[i]; Collection names = reader.getIndexedFieldNames(storedTermVector); fieldSet.addAll(names); } diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/SegmentInfo.java jakarta-lucene/src/java/org/apache/lucene/index/SegmentInfo.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/SegmentInfo.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/SegmentInfo.java Sat Apr 3 12:01:33 2004 @@ -18,14 +18,25 @@ import org.apache.lucene.store.Directory; -final class SegmentInfo { - public String name; // unique name in dir - public int docCount; // number of docs in seg - public Directory dir; // where segment resides - - public SegmentInfo(String name, int docCount, Directory dir) { - this.name = name; - this.docCount = docCount; - this.dir = dir; - } +import java.io.IOException; + +/** + * Contains information about a segment. The underlying object may be + * a segment (serialized into a directory) an InvertedDocument. + */ +abstract class SegmentInfo { + + abstract boolean isTransient(); + + abstract IndexSource openIndex() throws IOException; + + abstract boolean hasDeletions() throws IOException; + + abstract boolean usesCompoundFile() throws IOException; + + abstract boolean isStoredInDirectory(Directory directory); + + abstract String getDescription(); + + abstract int getDocCount(); } diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/SegmentInfos.java jakarta-lucene/src/java/org/apache/lucene/index/SegmentInfos.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/SegmentInfos.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/SegmentInfos.java Sat Apr 3 12:01:33 2004 @@ -36,7 +36,7 @@ counter = input.readInt(); // read counter for (int i = input.readInt(); i > 0; i--) { // read segmentInfos SegmentInfo si = - new SegmentInfo(input.readString(), input.readInt(), directory); + new DirectorySegmentInfo(directory, input.readString(), input.readInt()); addElement(si); } if (input.getFilePointer() >= input.length()) @@ -56,8 +56,13 @@ output.writeInt(size()); // write infos for (int i = 0; i < size(); i++) { SegmentInfo si = info(i); - output.writeString(si.name); - output.writeInt(si.docCount); + if(! (si instanceof DirectorySegmentInfo) ) { + String msg = "attempt to write non-directory segment to directory"; + throw new IllegalStateException(msg); + } + DirectorySegmentInfo dsi = (DirectorySegmentInfo)si; + output.writeString(dsi.name); + output.writeInt(dsi.docCount); } output.writeLong(++version); // every write changes the index } diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/SegmentMergeInfo.java jakarta-lucene/src/java/org/apache/lucene/index/SegmentMergeInfo.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/SegmentMergeInfo.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/SegmentMergeInfo.java Sat Apr 3 12:01:33 2004 @@ -23,25 +23,25 @@ Term term; int base; TermEnum termEnum; - IndexReader reader; + IndexSource source; TermPositions postings; int[] docMap = null; // maps around deleted docs - SegmentMergeInfo(int b, TermEnum te, IndexReader r) + SegmentMergeInfo(int b, TermEnum te, IndexSource s) throws IOException { base = b; - reader = r; + source = s; termEnum = te; term = te.term(); - postings = reader.termPositions(); + postings = source.termPositions(); // build array which maps document numbers around deletions - if (reader.hasDeletions()) { - int maxDoc = reader.maxDoc(); + if (source.hasDeletions()) { + int maxDoc = source.maxDoc(); docMap = new int[maxDoc]; int j = 0; for (int i = 0; i < maxDoc; i++) { - if (reader.isDeleted(i)) + if (source.isDeleted(i)) docMap[i] = -1; else docMap[i] = j++; diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/SegmentMerger.java jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/SegmentMerger.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java Sat Apr 3 12:01:33 2004 @@ -26,8 +26,8 @@ import org.apache.lucene.store.RAMOutputStream; /** - * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, - * into a single Segment. After adding the appropriate readers, call the merge method to combine the + * The SegmentMerger class combines two or more Segments, represented by an IndexSource ({@link #add}, + * into a single Segment. After adding the appropriate sources, call the merge method to combine the * segments. *

* If the compoundFile flag is set, then the segments will be merged into a compound file. @@ -41,7 +41,7 @@ private Directory directory; private String segment; - private Vector readers = new Vector(); + private Vector sources = new Vector(); private FieldInfos fieldInfos; // File extensions of old-style index files @@ -65,24 +65,15 @@ } /** - * Add an IndexReader to the collection of readers that are to be merged - * @param reader + * Add an IndexSource to the collection of sources that are to be merged + * @param source */ - final void add(IndexReader reader) { - readers.addElement(reader); + final void add(IndexSource source) { + sources.addElement(source); } /** - * - * @param i The index of the reader to return - * @return The ith reader to be merged - */ - final IndexReader segmentReader(int i) { - return (IndexReader) readers.elementAt(i); - } - - /** - * Merges the readers specified by the {@link #add} method into the directory passed to the constructor + * Merges the sources specified by the {@link #add} method into the directory passed to the constructor * @return The number of documents that were merged * @throws IOException */ @@ -97,9 +88,9 @@ mergeVectors(); } finally { - for (int i = 0; i < readers.size(); i++) { // close readers - IndexReader reader = (IndexReader) readers.elementAt(i); - reader.close(); + for (int i = 0; i < sources.size(); i++) { // close sources + IndexSource source = (IndexSource) sources.elementAt(i); + source.close(); } } @@ -155,29 +146,29 @@ /** * - * @return The number of documents in all of the readers + * @return The number of documents in all of the sources * @throws IOException */ private final int mergeFields() throws IOException { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; - for (int i = 0; i < readers.size(); i++) { - IndexReader reader = (IndexReader) readers.elementAt(i); - fieldInfos.addIndexed(reader.getIndexedFieldNames(true), true); - fieldInfos.addIndexed(reader.getIndexedFieldNames(false), false); - fieldInfos.add(reader.getFieldNames(false), false); + for (int i = 0; i < sources.size(); i++) { + IndexSource source = (IndexSource) sources.elementAt(i); + fieldInfos.addIndexed(source.getIndexedFieldNames(true), true); + fieldInfos.addIndexed(source.getIndexedFieldNames(false), false); + fieldInfos.add(source.getFieldNames(false), false); } fieldInfos.write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = // merge field values new FieldsWriter(directory, segment, fieldInfos); try { - for (int i = 0; i < readers.size(); i++) { - IndexReader reader = (IndexReader) readers.elementAt(i); - int maxDoc = reader.maxDoc(); + for (int i = 0; i < sources.size(); i++) { + IndexSource source = (IndexSource) sources.elementAt(i); + int maxDoc = source.maxDoc(); for (int j = 0; j < maxDoc; j++) - if (!reader.isDeleted(j)) { // skip deleted docs - fieldsWriter.addDocument(reader.document(j)); + if (!source.isDeleted(j)) { // skip deleted docs + fieldsWriter.addDocument(source.document(j)); docCount++; } } @@ -196,19 +187,19 @@ new TermVectorsWriter(directory, segment, fieldInfos); try { - for (int r = 0; r < readers.size(); r++) { - IndexReader reader = (IndexReader) readers.elementAt(r); - int maxDoc = reader.maxDoc(); + for (int r = 0; r < sources.size(); r++) { + IndexSource source = (IndexSource) sources.elementAt(r); + int maxDoc = source.maxDoc(); for (int docNum = 0; docNum < maxDoc; docNum++) { // skip deleted docs - if (reader.isDeleted(docNum)) { + if (source.isDeleted(docNum)) { continue; } termVectorsWriter.openDocument(); // get all term vectors TermFreqVector[] sourceTermVector = - reader.getTermFreqVectors(docNum); + source.getTermFreqVectors(docNum); if (sourceTermVector != null) { for (int f = 0; f < sourceTermVector.length; f++) { @@ -254,20 +245,20 @@ } private final void mergeTermInfos() throws IOException { - queue = new SegmentMergeQueue(readers.size()); + queue = new SegmentMergeQueue(sources.size()); int base = 0; - for (int i = 0; i < readers.size(); i++) { - IndexReader reader = (IndexReader) readers.elementAt(i); - TermEnum termEnum = reader.terms(); - SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader); - base += reader.numDocs(); + for (int i = 0; i < sources.size(); i++) { + IndexSource source = (IndexSource) sources.elementAt(i); + TermEnum termEnum = source.terms(); + SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, source); + base += source.numDocs(); if (smi.next()) queue.put(smi); // initialize queue else smi.close(); } - SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()]; + SegmentMergeInfo[] match = new SegmentMergeInfo[sources.size()]; while (queue.size() > 0) { int matchSize = 0; // pop matching terms @@ -411,13 +402,13 @@ if (fi.isIndexed) { OutputStream output = directory.createFile(segment + ".f" + i); try { - for (int j = 0; j < readers.size(); j++) { - IndexReader reader = (IndexReader) readers.elementAt(j); - byte[] input = reader.norms(fi.name); - int maxDoc = reader.maxDoc(); + for (int j = 0; j < sources.size(); j++) { + IndexSource source = (IndexSource) sources.elementAt(j); + byte[] input = source.norms(fi.name); + int maxDoc = source.maxDoc(); for (int k = 0; k < maxDoc; k++) { byte norm = input != null ? input[k] : (byte) 0; - if (!reader.isDeleted(k)) { + if (!source.isDeleted(k)) { output.writeByte(norm); } } diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/SegmentReader.java jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/SegmentReader.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java Sat Apr 3 12:01:33 2004 @@ -79,17 +79,17 @@ private Hashtable norms = new Hashtable(); - SegmentReader(SegmentInfos sis, SegmentInfo si, boolean closeDir) + SegmentReader(SegmentInfos sis, DirectorySegmentInfo si, boolean closeDir) throws IOException { - this(si); + this(si.dir, si.name); closeDirectory = closeDir; segmentInfos = sis; } - SegmentReader(SegmentInfo si) + SegmentReader(Directory dir, String segmentName) throws IOException { - super(si.dir); - segment = si.name; + super(dir); + segment = segmentName; // Use compound file directory for some files, if it exists Directory cfsDir = directory(); @@ -105,7 +105,7 @@ tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs - if (hasDeletions(si)) + if (directory().fileExists(segment + ".del")) deletedDocs = new BitVector(directory(), segment + ".del"); // make sure that all index files have been read or are kept open @@ -173,17 +173,8 @@ directory().close(); } - static final boolean hasDeletions(SegmentInfo si) throws IOException { - return si.dir.fileExists(si.name + ".del"); - } - public boolean hasDeletions() { return deletedDocs != null; - } - - - static final boolean usesCompoundFile(SegmentInfo si) throws IOException { - return si.dir.fileExists(si.name + ".cfs"); } protected final synchronized void doDelete(int docNum) throws IOException { diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/Term.java jakarta-lucene/src/java/org/apache/lucene/index/Term.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/Term.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/Term.java Sat Apr 3 12:06:54 2004 @@ -60,8 +60,8 @@ return field.hashCode() + text.hashCode(); } - public int compareTo(Object other) { - return compareTo((Term)other); + public final int compareTo(Object other) { + return compareTo((Term)other); } /** Compares two terms, returning an integer which is less than zero iff this diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/index/TermVectorsReader.java jakarta-lucene/src/java/org/apache/lucene/index/TermVectorsReader.java --- jakarta-lucene-orig/src/java/org/apache/lucene/index/TermVectorsReader.java Fri Feb 20 12:14:55 2004 +++ jakarta-lucene/src/java/org/apache/lucene/index/TermVectorsReader.java Sat Mar 13 23:24:49 2004 @@ -51,7 +51,7 @@ /** * - * @return The number of documents in the reader + * @return The number of documents in the source */ int size() { return size; diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/search/FilteredTermEnum.java jakarta-lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java --- jakarta-lucene-orig/src/java/org/apache/lucene/search/FilteredTermEnum.java Sat Apr 3 13:15:44 2004 +++ jakarta-lucene/src/java/org/apache/lucene/search/FilteredTermEnum.java Sat Apr 3 12:01:34 2004 @@ -17,9 +17,10 @@ */ import java.io.IOException; -import org.apache.lucene.index.IndexReader; + import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.IndexSource; /** Abstract class for enumerating a subset of all terms. @@ -29,7 +30,7 @@ private Term currentTerm = null; private TermEnum actualEnum = null; - public FilteredTermEnum(IndexReader reader, Term term) throws IOException {} + public FilteredTermEnum(IndexSource reader, Term term) throws IOException {} /** Equality compare on the term */ protected abstract boolean termCompare(Term term); diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/java/org/apache/lucene/search/Weight.java jakarta-lucene/src/java/org/apache/lucene/search/Weight.java --- jakarta-lucene-orig/src/java/org/apache/lucene/search/Weight.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/java/org/apache/lucene/search/Weight.java Sat Apr 3 12:01:34 2004 @@ -27,7 +27,7 @@ * is then called on the top-level query to compute the query normalization * factor (@link Similarity#queryNorm(float)}). This factor is then passed to * {@link #normalize(float)}. At this point the weighting is complete and a - * scorer may be constructed by calling {@link #scorer(IndexReader)}. + * scorer may be constructed by calling {@link #scorer(org.apache.lucene.index.IndexSource)}. */ public interface Weight extends java.io.Serializable { /** The query that this concerns. */ diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/DocTest.java jakarta-lucene/src/test/org/apache/lucene/index/DocTest.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/DocTest.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/test/org/apache/lucene/index/DocTest.java Sat Apr 3 12:01:35 2004 @@ -73,8 +73,8 @@ throws Exception { Directory directory = FSDirectory.getDirectory("test", false); - SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory)); - SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory)); + IndexSource r1 = new SegmentReader(directory, seg1); + IndexSource r2 = new SegmentReader(directory, seg2); SegmentMerger merger = new SegmentMerger(directory, merged, false); merger.add(r1); @@ -88,7 +88,7 @@ throws Exception { Directory directory = FSDirectory.getDirectory("test", false); SegmentReader reader = - new SegmentReader(new SegmentInfo(segment, 1, directory)); + new SegmentReader(directory, segment); for (int i = 0; i < reader.numDocs(); i++) System.out.println(reader.document(i)); diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/TestCompoundFile.java jakarta-lucene/src/test/org/apache/lucene/index/TestCompoundFile.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/TestCompoundFile.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/test/org/apache/lucene/index/TestCompoundFile.java Sat Apr 3 12:01:35 2004 @@ -390,7 +390,7 @@ assertSameStreams("basic clone two/2", expected, two); - // Now close the compound reader + // Now close the compound source cr.close(); assertFalse("Now closed one", isCSInputStreamOpen(one)); assertFalse("Now closed two", isCSInputStreamOpen(two)); diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/TestDoc.java jakarta-lucene/src/test/org/apache/lucene/index/TestDoc.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/TestDoc.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/test/org/apache/lucene/index/TestDoc.java Sat Apr 3 12:01:35 2004 @@ -177,8 +177,8 @@ throws Exception { Directory directory = FSDirectory.getDirectory(indexDir, false); - SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory)); - SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory)); + SegmentReader r1 = new SegmentReader(directory, seg1); + SegmentReader r2 = new SegmentReader(directory, seg2); SegmentMerger merger = new SegmentMerger(directory, merged, useCompoundFile); @@ -195,7 +195,7 @@ throws Exception { Directory directory = FSDirectory.getDirectory(indexDir, false); SegmentReader reader = - new SegmentReader(new SegmentInfo(segment, 1, directory)); + new SegmentReader(directory, segment); for (int i = 0; i < reader.numDocs(); i++) out.println(reader.document(i)); diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/TestDocumentWriter.java jakarta-lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/TestDocumentWriter.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java Sat Apr 3 12:01:35 2004 @@ -56,7 +56,7 @@ try { writer.addDocument("test", testDoc); //After adding the document, we should be able to read it back in - SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir)); + IndexSource reader = new SegmentReader(dir, "test"); assertTrue(reader != null); Document doc = reader.document(0); assertTrue(doc != null); diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/TestIndexReader.java jakarta-lucene/src/test/org/apache/lucene/index/TestIndexReader.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/TestIndexReader.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/test/org/apache/lucene/index/TestIndexReader.java Sat Apr 3 12:01:35 2004 @@ -60,7 +60,7 @@ IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true); addDocumentWithFields(writer); writer.close(); - // set up reader + // set up source IndexReader reader = IndexReader.open(d); Collection fieldNames = reader.getFieldNames(); assertTrue(fieldNames.contains("keyword")); @@ -154,7 +154,7 @@ // index at the point of having 100 "aaa" documents and 0 "bbb" reader = IndexReader.open(dir); assertEquals("first docFreq", 100, reader.docFreq(searchTerm)); - assertTermDocsCount("first reader", reader, searchTerm, 100); + assertTermDocsCount("first source", reader, searchTerm, 100); // DELETE DOCUMENTS CONTAINING TERM: aaa int deleted = 0; @@ -195,8 +195,8 @@ IndexReader reader = IndexReader.open(dir); assertEquals("first docFreq", 100, reader.docFreq(searchTerm)); assertEquals("first docFreq", 0, reader.docFreq(searchTerm2)); - assertTermDocsCount("first reader", reader, searchTerm, 100); - assertTermDocsCount("first reader", reader, searchTerm2, 0); + assertTermDocsCount("first source", reader, searchTerm, 100); + assertTermDocsCount("first source", reader, searchTerm2, 0); // add 100 documents with term : bbb writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); @@ -208,36 +208,36 @@ // REQUEST OPTIMIZATION // This causes a new segment to become current for all subsequent // searchers. Because of this, deletions made via a previously open - // reader, which would be applied to that reader's segment, are lost + // source, which would be applied to that source's segment, are lost // for subsequent searchers/readers writer.optimize(); writer.close(); - // The reader should not see the new data + // The source should not see the new data assertEquals("first docFreq", 100, reader.docFreq(searchTerm)); assertEquals("first docFreq", 0, reader.docFreq(searchTerm2)); - assertTermDocsCount("first reader", reader, searchTerm, 100); - assertTermDocsCount("first reader", reader, searchTerm2, 0); + assertTermDocsCount("first source", reader, searchTerm, 100); + assertTermDocsCount("first source", reader, searchTerm2, 0); // DELETE DOCUMENTS CONTAINING TERM: aaa - // NOTE: the reader was created when only "aaa" documents were in + // NOTE: the source was created when only "aaa" documents were in int deleted = 0; try { deleted = reader.delete(searchTerm); - fail("Delete allowed on an index reader with stale segment information"); + fail("Delete allowed on an index source with stale segment information"); } catch (IOException e) { /* success */ } - // Re-open index reader and try again. This time it should see + // Re-open index source and try again. This time it should see // the new data. reader.close(); reader = IndexReader.open(dir); assertEquals("first docFreq", 100, reader.docFreq(searchTerm)); assertEquals("first docFreq", 100, reader.docFreq(searchTerm2)); - assertTermDocsCount("first reader", reader, searchTerm, 100); - assertTermDocsCount("first reader", reader, searchTerm2, 100); + assertTermDocsCount("first source", reader, searchTerm, 100); + assertTermDocsCount("first source", reader, searchTerm2, 100); deleted = reader.delete(searchTerm); assertEquals("deleted count", 100, deleted); @@ -278,13 +278,13 @@ writer.close(); dir.close(); - // Now open existing directory and test that reader closes all files + // Now open existing directory and test that source closes all files dir = getDirectory(false); - IndexReader reader1 = IndexReader.open(dir); + IndexSource reader1 = IndexReader.open(dir); reader1.close(); dir.close(); - // The following will fail if reader did not close all files + // The following will fail if source did not close all files dir = getDirectory(true); } @@ -330,8 +330,8 @@ // DELETE DOCS FROM READER 2 and CLOSE IT // delete documents containing term: aaa - // when the reader is closed, the segment info is updated and - // the first reader is now stale + // when the source is closed, the segment info is updated and + // the first source is now stale reader2.delete(searchTerm1); assertEquals("after delete 1", 100, reader2.docFreq(searchTerm1)); assertEquals("after delete 1", 100, reader2.docFreq(searchTerm2)); @@ -341,7 +341,7 @@ assertTermDocsCount("after delete 1", reader2, searchTerm3, 100); reader2.close(); - // Make sure reader 1 is unchanged since it was open earlier + // Make sure source 1 is unchanged since it was open earlier assertEquals("after delete 1", 100, reader1.docFreq(searchTerm1)); assertEquals("after delete 1", 100, reader1.docFreq(searchTerm2)); assertEquals("after delete 1", 100, reader1.docFreq(searchTerm3)); @@ -354,7 +354,7 @@ // delete documents containing term: bbb try { reader1.delete(searchTerm2); - fail("Delete allowed from a stale index reader"); + fail("Delete allowed from a stale index source"); } catch (IOException e) { /* success */ } @@ -378,7 +378,7 @@ assertTermDocsCount("deleted 2", reader1, searchTerm3, 100); reader1.close(); - // Open another reader to confirm that everything is deleted + // Open another source to confirm that everything is deleted reader2 = IndexReader.open(dir); assertEquals("reopened 2", 100, reader2.docFreq(searchTerm1)); assertEquals("reopened 2", 100, reader2.docFreq(searchTerm2)); diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/TestIndexWriter.java jakarta-lucene/src/test/org/apache/lucene/index/TestIndexWriter.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/TestIndexWriter.java Mon Oct 13 07:31:38 2003 +++ jakarta-lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Sat Mar 13 23:24:49 2004 @@ -60,7 +60,7 @@ assertEquals(60, writer.docCount()); writer.close(); - // check that the index reader gives the same numbers. + // check that the index source gives the same numbers. reader = IndexReader.open(dir); assertEquals(60, reader.maxDoc()); assertEquals(60, reader.numDocs()); diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/TestMultiReader.java jakarta-lucene/src/test/org/apache/lucene/index/TestMultiReader.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/TestMultiReader.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/test/org/apache/lucene/index/TestMultiReader.java Sat Apr 3 12:01:35 2004 @@ -44,8 +44,8 @@ try { sis.write(dir); - reader1 = new SegmentReader(new SegmentInfo("seg-1", 1, dir)); - reader2 = new SegmentReader(new SegmentInfo("seg-2", 1, dir)); + reader1 = new SegmentReader(dir, "seg-1"); + reader2 = new SegmentReader(dir, "seg-2"); readers[0] = reader1; readers[1] = reader2; } catch (IOException e) { diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/TestSegmentMerger.java jakarta-lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/TestSegmentMerger.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java Sat Apr 3 12:01:35 2004 @@ -50,8 +50,8 @@ DocHelper.setupDoc(doc2); DocHelper.writeDoc(merge2Dir, merge2Segment, doc2); try { - reader1 = new SegmentReader(new SegmentInfo(merge1Segment, 1, merge1Dir)); - reader2 = new SegmentReader(new SegmentInfo(merge2Segment, 1, merge2Dir)); + reader1 = new SegmentReader(merge1Dir, merge1Segment); + reader2 = new SegmentReader(merge2Dir, merge2Segment); } catch (IOException e) { e.printStackTrace(); } @@ -79,7 +79,7 @@ int docsMerged = merger.merge(); assertTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory - SegmentReader mergedReader = new SegmentReader(new SegmentInfo(mergedSegment, docsMerged, mergedDir)); + SegmentReader mergedReader = new SegmentReader(mergedDir, mergedSegment); assertTrue(mergedReader != null); assertTrue(mergedReader.numDocs() == 2); Document newDoc1 = mergedReader.document(0); diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/TestSegmentReader.java jakarta-lucene/src/test/org/apache/lucene/index/TestSegmentReader.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/TestSegmentReader.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/test/org/apache/lucene/index/TestSegmentReader.java Sat Apr 3 12:01:35 2004 @@ -35,13 +35,13 @@ super(s); } - //TODO: Setup the reader w/ multiple documents + //TODO: Setup the source w/ multiple documents protected void setUp() { try { DocHelper.setupDoc(testDoc); DocHelper.writeDoc(dir, testDoc); - reader = new SegmentReader(new SegmentInfo("test", 1, dir)); + reader = new SegmentReader(dir, "test"); } catch (IOException e) { } @@ -84,7 +84,7 @@ DocHelper.setupDoc(docToDelete); DocHelper.writeDoc(dir, "seg-to-delete", docToDelete); try { - SegmentReader deleteReader = new SegmentReader(new SegmentInfo("seg-to-delete", 1, dir)); + SegmentReader deleteReader = new SegmentReader(dir, "seg-to-delete"); assertTrue(deleteReader != null); assertTrue(deleteReader.numDocs() == 1); deleteReader.delete(0); @@ -174,7 +174,7 @@ //TODO: Not sure how these work/should be tested /* try { - byte [] norms = reader.norms(DocHelper.TEXT_FIELD_1_KEY); + byte [] norms = source.norms(DocHelper.TEXT_FIELD_1_KEY); System.out.println("Norms: " + norms); assertTrue(norms != null); } catch (IOException e) { diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/TestSegmentTermDocs.java jakarta-lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/TestSegmentTermDocs.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java Sat Apr 3 12:01:35 2004 @@ -52,7 +52,7 @@ public void testTermDocs() { try { //After adding the document, we should be able to read it back in - SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir)); + SegmentReader reader = new SegmentReader(dir, "test"); assertTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); assertTrue(segTermDocs != null); @@ -73,7 +73,7 @@ public void testBadSeek() { try { //After adding the document, we should be able to read it back in - SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir)); + SegmentReader reader = new SegmentReader(dir, "test"); assertTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); assertTrue(segTermDocs != null); @@ -85,7 +85,7 @@ } try { //After adding the document, we should be able to read it back in - SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir)); + SegmentReader reader = new SegmentReader(dir, "test"); assertTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); assertTrue(segTermDocs != null); diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/index/TestTermVectorsWriter.java jakarta-lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java --- jakarta-lucene-orig/src/test/org/apache/lucene/index/TestTermVectorsWriter.java Sat Apr 3 13:15:45 2004 +++ jakarta-lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java Sat Apr 3 12:01:35 2004 @@ -76,9 +76,9 @@ assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION)); assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION)); //Now read it back in - TermVectorsReader reader = new TermVectorsReader(dir, seg); - assertTrue(reader != null); - checkTermVector(reader, 0, 0); + TermVectorsReader source = new TermVectorsReader(dir, seg); + assertTrue(source != null); + checkTermVector(source, 0, 0); } catch (IOException e) { e.printStackTrace(); assertTrue(false); diff -Naur --exclude=CVS --exclude=.* jakarta-lucene-orig/src/test/org/apache/lucene/search/TestTermVectors.java jakarta-lucene/src/test/org/apache/lucene/search/TestTermVectors.java --- jakarta-lucene-orig/src/test/org/apache/lucene/search/TestTermVectors.java Sat Apr 3 13:15:46 2004 +++ jakarta-lucene/src/test/org/apache/lucene/search/TestTermVectors.java Sat Apr 3 12:01:36 2004 @@ -166,9 +166,9 @@ int [] freqs = vector.getTermFrequencies(); for (int i = 0; i < vTerms.length; i++) { - if (term.text().equals(vTerms[i]) == true) + if (term.text().equals(vTerms[i])) { - assertTrue(freqs[i] == freq); + assertEquals("invalid frequency for "+term.text()+" in doc "+docId, freq, freqs[i]); } }