Index: org/apache/lucene/document/Field.java =================================================================== retrieving revision 1.12 diff -u -r1.12 Field.java --- org/apache/lucene/document/Field.java 20 Feb 2004 20:14:55 -0000 1.12 +++ org/apache/lucene/document/Field.java 18 Mar 2004 11:52:37 -0000 @@ -79,6 +79,8 @@ private float boost = 1.0f; + private int dataStore = 0; + /** Sets the boost factor hits on this field. This value will be * multiplied into the score of all hits on this this field of this * document. @@ -177,21 +179,21 @@ /** The name of the field (e.g., "date", "subject", "title", or "body") as an interned string. */ - public String name() { return name; } + public String name() { return name; } /** The value of the field as a String, or null. If null, the Reader value is used. Exactly one of stringValue() and readerValue() must be set. */ - public String stringValue() { return stringValue; } + public String stringValue() { return stringValue; } /** The value of the field as a Reader, or null. If null, the String value is used. Exactly one of stringValue() and readerValue() must be set. */ - public Reader readerValue() { return readerValue; } + public Reader readerValue() { return readerValue; } /** Create a field by specifying all parameters except for storeTermVector, * which is set to false. */ public Field(String name, String string, - boolean store, boolean index, boolean token) { + boolean store, boolean index, boolean token) { this(name, string, store, index, token, false); } @@ -205,7 +207,7 @@ * @param storeTermVector true if we should store the Term Vector info */ public Field(String name, String string, - boolean store, boolean index, boolean token, boolean storeTermVector) { + boolean store, boolean index, boolean token, boolean storeTermVector) { if (name == null) throw new IllegalArgumentException("name cannot be null"); if (string == null) @@ -213,7 +215,7 @@ if (!index && storeTermVector) throw new IllegalArgumentException("cannot store a term vector for fields that are not indexed."); - this.name = name.intern(); // field names are interned + this.name = name.intern(); // field names are interned this.stringValue = string; this.isStored = store; this.isIndexed = index; @@ -227,23 +229,23 @@ if (reader == null) throw new IllegalArgumentException("value cannot be null"); - this.name = name.intern(); // field names are interned + this.name = name.intern(); // field names are interned this.readerValue = reader; } /** True iff the value of the field is to be stored in the index for return with search hits. It is an error for this to be true if a field is Reader-valued. */ - public final boolean isStored() { return isStored; } + public final boolean isStored() { return isStored; } /** True iff the value of the field is to be indexed, so that it may be searched on. */ - public final boolean isIndexed() { return isIndexed; } + public final boolean isIndexed() { return isIndexed; } /** True iff the value of the field should be tokenized as text prior to indexing. Un-tokenized fields are indexed as a single word and may not be Reader-valued. */ - public final boolean isTokenized() { return isTokenized; } + public final boolean isTokenized() { return isTokenized; } /** True iff the term or terms used to index this field are stored as a term * vector, avaliable from {@link IndexReader#getTermFreqVector(int,String)}. @@ -274,5 +276,21 @@ return super.toString(); } } + + /** + * Retrieves the data store number where this Field is stored in + * (defaults to 0). + * + * @return Data store number, 0 by default. + */ + public final int getDataStore() { return dataStore; } + + /** + * Sets the data store number for this Field + * + * @param segment Data store number, must be >= 0. + * @return Reference to this field + */ + public final Field setDataStore(int storeNo) { dataStore = storeNo; return this; } } Index: org/apache/lucene/index/FieldsReader.java =================================================================== retrieving revision 1.6 diff -u -r1.6 FieldsReader.java --- org/apache/lucene/index/FieldsReader.java 20 Feb 2004 20:14:55 -0000 1.6 +++ org/apache/lucene/index/FieldsReader.java 18 Mar 2004 11:52:37 -0000 @@ -55,6 +55,9 @@ */ import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; import org.apache.lucene.store.Directory; import org.apache.lucene.store.InputStream; @@ -70,21 +73,39 @@ */ final class FieldsReader { private FieldInfos fieldInfos; - private InputStream fieldsStream; + private List fieldsStreams = new ArrayList(); + private InputStream fieldsStream0; private InputStream indexStream; private int size; + private final boolean multipleFDT; FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { fieldInfos = fn; - fieldsStream = d.openFile(segment + ".fdt"); + fieldsStream0 = d.openFile(segment + ".fdt"); + fieldsStreams.add(fieldsStream0); indexStream = d.openFile(segment + ".fdx"); size = (int)(indexStream.length() / 8); + + int n = 1; + do { + String fdt = segment + ".fd" + n; + if(d.fileExists(fdt)) { + fieldsStreams.add(d.openFile(fdt)); + n++; + } else { + break; + } + } while(true); + multipleFDT = (n > 1); + } final void close() throws IOException { - fieldsStream.close(); + for(Iterator it=fieldsStreams.iterator();it.hasNext();) { + ((InputStream)it.next()).close(); + } indexStream.close(); } @@ -92,26 +113,48 @@ return size; } - final Document doc(int n) throws IOException { + final Document doc(final int n) throws IOException { + return doc(n, Integer.MAX_VALUE); + } + final Document doc(final int n, final int maxSegment) throws IOException { indexStream.seek(n * 8L); long position = indexStream.readLong(); - fieldsStream.seek(position); Document doc = new Document(); - int numFields = fieldsStream.readVInt(); - for (int i = 0; i < numFields; i++) { - int fieldNumber = fieldsStream.readVInt(); - FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); - - byte bits = fieldsStream.readByte(); - - doc.add(new Field(fi.name, // name - fieldsStream.readString(), // read value - true, // stored - fi.isIndexed, // indexed - (bits & 1) != 0, fi.storeTermVector)); // vector + + int segmentNo = 0; + if(multipleFDT) { + for(Iterator it=fieldsStreams.iterator();it.hasNext() && segmentNo <= maxSegment && position != -1;segmentNo++) { + InputStream fieldsStream = (InputStream)it.next(); + fieldsStream.seek(position); + readFields(doc, fieldsStream, segmentNo); + position = fieldsStream.readLong(); + } + } else { + fieldsStream0.seek(position); + readFields(doc, fieldsStream0, 0); } - return doc; + } + + final void readFields(Document doc, InputStream fieldsStream, int storeNo) throws IOException { + try { + int numFields = fieldsStream.readVInt(); + for (int i = 0; i < numFields; i++) { + int fieldNumber = fieldsStream.readVInt(); + FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); + + byte bits = fieldsStream.readByte(); + + doc.add(new Field(fi.name, // name + fieldsStream.readString(), // read value + true, // stored + fi.isIndexed, // indexed + (bits & 1) != 0, fi.storeTermVector).setDataStore(storeNo)); // vector + } + } catch(IOException e) { + System.err.println("Doc = "+doc+"; FieldsStream = "+fieldsStream+"; storeNo="+storeNo); + throw e; + } } } Index: org/apache/lucene/index/FieldsWriter.java =================================================================== retrieving revision 1.2 diff -u -r1.2 FieldsWriter.java --- org/apache/lucene/index/FieldsWriter.java 29 Jan 2003 17:18:54 -0000 1.2 +++ org/apache/lucene/index/FieldsWriter.java 18 Mar 2004 11:52:37 -0000 @@ -54,7 +54,10 @@ * . */ +import java.util.ArrayList; import java.util.Enumeration; +import java.util.Iterator; +import java.util.List; import java.io.IOException; import org.apache.lucene.store.Directory; @@ -63,47 +66,122 @@ import org.apache.lucene.document.Field; final class FieldsWriter { - private FieldInfos fieldInfos; - private OutputStream fieldsStream; - private OutputStream indexStream; - - FieldsWriter(Directory d, String segment, FieldInfos fn) - throws IOException { - fieldInfos = fn; - fieldsStream = d.createFile(segment + ".fdt"); - indexStream = d.createFile(segment + ".fdx"); - } - - final void close() throws IOException { - fieldsStream.close(); - indexStream.close(); - } - - final void addDocument(Document doc) throws IOException { - indexStream.writeLong(fieldsStream.getFilePointer()); - - int storedCount = 0; - Enumeration fields = doc.fields(); - while (fields.hasMoreElements()) { - Field field = (Field)fields.nextElement(); - if (field.isStored()) - storedCount++; - } - fieldsStream.writeVInt(storedCount); + private FieldInfos fieldInfos; + private OutputStream indexStream; - fields = doc.fields(); - while (fields.hasMoreElements()) { - Field field = (Field)fields.nextElement(); - if (field.isStored()) { - fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name())); - - byte bits = 0; - if (field.isTokenized()) - bits |= 1; - fieldsStream.writeByte(bits); + private final List fieldsStreams = new ArrayList(); + private final Directory d; + private final String segment; + + FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException { + this.d = d; + this.segment = segment; + + fieldInfos = fn; + fieldsStreams.add(d.createFile(segment + ".fdt")); + indexStream = d.createFile(segment + ".fdx"); + } + + final void close() throws IOException { + for (Iterator it = fieldsStreams.iterator(); it.hasNext();) { + ((OutputStream) it.next()).close(); + } + + indexStream.close(); + } - fieldsStream.writeString(field.stringValue()); - } + final void addDocument(Document doc) throws IOException { + + // sort Fields by document segment + List stores = new ArrayList(); // List< List > + Enumeration fields = doc.fields(); + int numFieldsTotal = 0; + while (fields.hasMoreElements()) { + Field field = (Field) fields.nextElement(); + if (field.isStored()) { + numFieldsTotal++; + final int segmentNo = field.getDataStore(); + + List fieldsStore = + (List) getObjectFromList(stores, segmentNo); + if (fieldsStore == null) { + fieldsStore = new ArrayList(); + ensureListCapacity(stores, segmentNo); + stores.set(segmentNo, fieldsStore); + } + fieldsStore.add(field); + } + } + + int storeMax = 0; + for(Iterator it=stores.iterator();it.hasNext();storeMax++) { + if(it.next() == null) { + throw new IOException("Non-contiguous field data id detected; expected field data store #"+storeMax); + } + } + + if(numFieldsTotal == 0) { + OutputStream fieldsStream = (OutputStream) getObjectFromList(fieldsStreams, 0); + indexStream.writeLong(fieldsStream.getFilePointer()); + fieldsStream.writeVInt(0); + } else { + OutputStream previousStream = indexStream; + + int storeNo = 0; + for(Iterator storesIt=stores.iterator();storesIt.hasNext();storeNo++) { + List fieldsStore = (List)storesIt.next(); + + if(fieldsStore == null) { + // do not allow gaps between store numbers + break; + } + + OutputStream fieldsStream = + (OutputStream) getObjectFromList(fieldsStreams, storeNo); + if (fieldsStream == null) { + ensureListCapacity(fieldsStreams, storeNo); + fieldsStream = + d.createFile(segment + ".fd" + storeNo); + fieldsStreams.set(storeNo, fieldsStream); + } + + // Create the link between fieldsStreams + previousStream.writeLong(fieldsStream.getFilePointer()); + + fieldsStream.writeVInt(fieldsStore.size()); // numFields + + for (Iterator it = fieldsStore.iterator(); it.hasNext();) { + Field field = (Field) it.next(); + + fieldsStream.writeVInt( + fieldInfos.fieldNumber(field.name())); + + byte bits = 0; + if (field.isTokenized()) + bits |= 1; + fieldsStream.writeByte(bits); + + fieldsStream.writeString(field.stringValue()); + } + + previousStream = fieldsStream; + } + + // write data store terminator + previousStream.writeLong(-1); + } + } + + final Object getObjectFromList(List l, int index) { + if (l.size() <= index || index < 0) { + return null; + } else { + return l.get(index); + } + } + final void ensureListCapacity(List l, int size) { + while (l.size() <= size) { + l.add(null); + } } - } } Index: org/apache/lucene/index/IndexReader.java =================================================================== retrieving revision 1.28 diff -u -r1.28 IndexReader.java --- org/apache/lucene/index/IndexReader.java 26 Feb 2004 18:27:12 -0000 1.28 +++ org/apache/lucene/index/IndexReader.java 18 Mar 2004 11:52:37 -0000 @@ -280,6 +280,13 @@ Document in this index. */ public abstract Document document(int n) throws IOException; + /** Returns the stored fields of the nth + Document in this index, at least up to the kth + field data store. */ + public Document document(int n, int k) throws IOException { + return document(n); + } + /** Returns true if document n has been deleted */ public abstract boolean isDeleted(int n); Index: org/apache/lucene/index/MultiReader.java =================================================================== retrieving revision 1.2 diff -u -r1.2 MultiReader.java --- org/apache/lucene/index/MultiReader.java 20 Feb 2004 20:14:55 -0000 1.2 +++ org/apache/lucene/index/MultiReader.java 18 Mar 2004 11:52:38 -0000 @@ -134,6 +134,10 @@ int i = readerIndex(n); // find segment num return readers[i].document(n - starts[i]); // dispatch to segment reader } + public Document document(int n, int k) throws IOException { + int i = readerIndex(n); // find segment num + return readers[i].document(n - starts[i], k); // dispatch to segment reader + } public boolean isDeleted(int n) { int i = readerIndex(n); // find segment num Index: org/apache/lucene/index/SegmentMerger.java =================================================================== retrieving revision 1.8 diff -u -r1.8 SegmentMerger.java --- org/apache/lucene/index/SegmentMerger.java 20 Feb 2004 20:14:55 -0000 1.8 +++ org/apache/lucene/index/SegmentMerger.java 18 Mar 2004 11:52:38 -0000 @@ -167,6 +167,16 @@ files.add(segment + ".f" + i); } } + + { + // add extra field data store files + int i = 1; + String name; + while((name = segment+".fd"+i) != null && directory.fileExists(name)) { + files.add(name); + i++; + } + } // Vector files if (fieldInfos.hasVectors()) { Index: org/apache/lucene/index/SegmentReader.java =================================================================== retrieving revision 1.19 diff -u -r1.19 SegmentReader.java --- org/apache/lucene/index/SegmentReader.java 20 Feb 2004 20:14:55 -0000 1.19 +++ org/apache/lucene/index/SegmentReader.java 18 Mar 2004 11:52:38 -0000 @@ -259,6 +259,17 @@ if (directory().fileExists(name)) files.addElement(name); } + + { + // add extra field data store files + int i = 1; + Directory d = directory(); + String name; + while((name = segment+".fd"+i) != null && d.fileExists(name)) { + files.addElement(name); + i++; + } + } for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); @@ -281,6 +292,12 @@ throw new IllegalArgumentException ("attempt to access a deleted document"); return fieldsReader.doc(n); + } + public final synchronized Document document(int n, int k) throws IOException { + if (isDeleted(n)) + throw new IllegalArgumentException + ("attempt to access a deleted document"); + return fieldsReader.doc(n, k); } public final synchronized boolean isDeleted(int n) { Index: org/apache/lucene/store/Directory.java =================================================================== retrieving revision 1.5 diff -u -r1.5 Directory.java --- org/apache/lucene/store/Directory.java 7 Nov 2002 05:55:40 -0000 1.5 +++ org/apache/lucene/store/Directory.java 18 Mar 2004 11:52:38 -0000 @@ -117,4 +117,28 @@ /** Closes the store. */ public abstract void close() throws IOException; + + /** Copies the specified file from the given source directory to this directory */ + public void copyFrom(Directory sourceDirectory, String name) throws IOException { + copyFrom(sourceDirectory, name, name); + } + /** Copies the specified file from the given source directory to this directory */ + public void copyFrom(Directory sourceDirectory, String sourceName, String targetName) throws IOException { + // read current file + InputStream is = sourceDirectory.openFile(sourceName); + // make place in our directory + OutputStream os = createFile(targetName); + // and copy to our directory + long filelen = is.length; + int buflen = Math.min((int) filelen, 16384); + byte[] buf = new byte[buflen]; + for(long p=0;pTestCachedDirectory + * testcase. + * + * @return + */ + Directory getCacheDirectory() { + return cacheDir; + } + + /** + * Creates a new CachedDirectory where only those files are cacheable that + * are acceptable by the specified {@link FileFilter}. + * + * @param underlyingDir The underlying Directory used for persistent + * storage + * @param filter Determines if a file may be cached. + */ + public CachedDirectory(Directory underlyingDir, FileFilter filter) { + this.underlyingDir = underlyingDir; + this.filter = filter; + } + + private boolean isCacheable(String name) { + if(filter == null) { + return true; + } + + if(isCached(name)) { + return true; + } + + return filter.accept(underlyingDir, name); + } + + private final boolean isCached(String name) { + return cacheDir.fileExists(name); + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#list() + */ + public String[] list() throws IOException { + Set files = new HashSet(); + String[] l = underlyingDir.list(); + for(int i = 0; i < l.length; i++) { + files.add(l[i]); + } + + String[] cachedFiles = cacheDir.list(); + for(int i = 0; i < cachedFiles.length; i++) { + files.add(cachedFiles[i]); + } + return (String[]) files.toArray(new String[files.size()]); + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#fileExists(java.lang.String) + */ + public boolean fileExists(String name) throws IOException { + /** + * The cacheDir.fileExists() method call checks currently open, + * cacheable files + */ + return underlyingDir.fileExists(name) || cacheDir.fileExists(name); + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#fileModified(java.lang.String) + */ + public long fileModified(String name) throws IOException { + if(isCached(name)) { + return cacheDir.fileModified(name); + } else { + return underlyingDir.fileModified(name); + } + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#touchFile(java.lang.String) + */ + public void touchFile(String name) throws IOException { + underlyingDir.touchFile(name); + if(isCacheable(name)) { + cacheDir.touchFile(name); + } + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#deleteFile(java.lang.String) + */ + public void deleteFile(String name) throws IOException { + underlyingDir.deleteFile(name); + if(isCached(name)) { + cacheDir.deleteFile(name); + } + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#renameFile(java.lang.String, + * java.lang.String) + */ + public void renameFile(String from, String to) throws IOException { + underlyingDir.renameFile(from, to); + if(isCached(from)) { + if(isCacheable(to)) { + cacheDir.renameFile(from, to); + } else { + // remove file from cache + underlyingDir.copyFrom(cacheDir, from, to); + cacheDir.deleteFile(from); + } + } else { + // do not cache here, even if "to" would be cacheable + underlyingDir.renameFile(from, to); + } + } + + /** + * Explicitly copy a file into cache, even if it does not match required + * criteria for automatic caching. + * + * @param name + * @throws IOException + */ + public void cacheFile(String name) throws IOException { + if(isCached(name)) { + return; + } + cacheDir.copyFrom(underlyingDir, name); + } + + /** + * Temporarily remove a file from cache. It may be cached again, when + * {@link #openFile(String)}or {@link #createFile(String)}is called. + * + */ + public void uncacheFile(String name) throws IOException { + if(isCached(name)) { + cacheDir.deleteFile(name); + } + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#fileLength(java.lang.String) + */ + public long fileLength(String name) throws IOException { + if(isCached(name)) { + return cacheDir.fileLength(name); + } else { + return underlyingDir.fileLength(name); + } + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#createFile(java.lang.String) + */ + public OutputStream createFile(String name) throws IOException { + if(isCacheable(name)) { + /* + * Create the file in cacheDir. After closing the OutputStream copy + * it to the underlying stream + */ + return new CachedOutputStream(name, cacheDir.createFile(name)); + } else { + return underlyingDir.createFile(name); + } + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#openFile(java.lang.String) + */ + public InputStream openFile(String name) throws IOException { + if(isCached(name)) { + return cacheDir.openFile(name); + } else { + if(isCacheable(name)) { + // copy to cache + cacheDir.copyFrom(underlyingDir, name); + return cacheDir.openFile(name); + } else { + return underlyingDir.openFile(name); + } + } + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#makeLock(java.lang.String) + */ + public Lock makeLock(String name) { + // always make Locks in the underlying directory + return underlyingDir.makeLock(name); + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.store.Directory#close() + */ + public void close() throws IOException { + cacheDir.close(); + } + + /** + * Copies the file to the underlying directory when close() + * is called. + * + * @author Christian Kohlschuetter + */ + private class CachedOutputStream extends OutputStream { + private String name; + private OutputStream out; + + CachedOutputStream(String name, OutputStream ramOutputStream) { + this.name = name; + this.out = ramOutputStream; + } + + public void close() throws IOException { + super.close(); + out.close(); + // Now write this file back to the underlying directory + underlyingDir.copyFrom(cacheDir, name); + } + + protected void flushBuffer(byte[] b, int len) throws IOException { + out.flushBuffer(b, len); + } + + public long length() throws IOException { + return out.length(); + } + + public void seek(long pos) throws IOException { + out.seek(pos); + } + } +} Index: src/java/org/apache/lucene/store/FileFilter.java =================================================================== RCS file: src/java/org/apache/lucene/store/FileFilter.java diff -N src/java/org/apache/lucene/store/FileFilter.java --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/java/org/apache/lucene/store/FileFilter.java 1 Jan 1970 00:00:00 -0000 @@ -0,0 +1,17 @@ +package org.apache.lucene.store; + +/** + * A filter for specific files. + * + * @author Christian Kohlschuetter + */ +public interface FileFilter { + /** + * Checks if the specified file(name) matches specific criteria. + * + * @param dir Directory containing this file + * @param name Name of this file + * @return true if the criteria are matched, false otherwise. + */ + public boolean accept(Directory dir, String name); +}