diff --git lucene/src/java/org/apache/lucene/index/CFSFileDirectory.java lucene/src/java/org/apache/lucene/index/CFSFileDirectory.java new file mode 100755 index 0000000..0cca5fa --- /dev/null +++ lucene/src/java/org/apache/lucene/index/CFSFileDirectory.java @@ -0,0 +1,161 @@ +package org.apache.lucene.index; + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.standard.StandardCodec; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMDirectory; + +/* + * Present a single CFS file as a Lucene Directory. + * Strategy: open a CompoundFileReader for the CFS file + * and fabricate the segment info files in a RAMDirectory + * Requests for segment* goto the RAMDirectory, everything + * else goes to the CompoundFileReader. + * + * TODO: + * Get the number of documents in the segment in a clean way. + * Get the codec type from the segment. + */ + +public class CFSFileDirectory extends Directory { + final CompoundFileReader cfsDir; + final RAMDirectory ramdir; + + public CFSFileDirectory(String path, String filename) throws IOException { + File file = new File(path); + Directory dir = FSDirectory.open(file); + + ramdir = new RAMDirectory(); + cfsDir = new CompoundFileReader(dir, filename); + int ndocs = getNDocuments(cfsDir); + fabricate(ndocs, ramdir); + } + + public CFSFileDirectory(Directory dir, String filename) throws IOException { + ramdir = new RAMDirectory(); + cfsDir = new CompoundFileReader(dir, filename); + int ndocs = getNDocuments(cfsDir); + fabricate(ndocs, ramdir); + } + + private int getNDocuments(CompoundFileReader cfrReader) throws IOException { + // I got this from a 4-year-old email exchange. + IndexInput indexStream = cfrReader.openInput(".fdx"); + int size = (int) (indexStream.length() / 8); + indexStream.close(); + return size; + } + + private void fabricate(int ndocs, Directory dir) throws IOException { + SegmentInfos infos = new SegmentInfos(); + + // need to get codec name out of the CFS files + Codec codec = new StandardCodec(); + SegmentInfo segmentInfo = new SegmentInfo("", ndocs, dir, false, -1, null, false, false, codec); + infos.addElement(segmentInfo); + infos.commit(dir); + } + + private boolean doSegments(String name) { + return name.startsWith("segments"); + } + + private void closeDir(Directory directory) { + try { + directory.close(); + } catch (Throwable t) { + ; + } + } + + @Override + public void close() { + closeDir(cfsDir); + closeDir(ramdir); + } + + @Override + public IndexOutput createOutput(String name) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void deleteFile(String name) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean fileExists(String name) throws IOException { + if (doSegments(name)) + return ramdir.fileExists(name); + else + return cfsDir.fileExists(name); + } + + @Override + public long fileLength(String name) throws IOException { + if (doSegments(name)) + return ramdir.fileLength(name); + else + return cfsDir.fileLength(name); + } + + @Override + public long fileModified(String name) throws IOException { + if (doSegments(name)) + return ramdir.fileModified(name); + else + return cfsDir.fileModified(name); + } + + @Override + public String[] listAll() throws IOException { + String[] cfrFiles = cfsDir.listAll(); + String[] ramFiles = ramdir.listAll(); + String[] allFiles = new String[cfrFiles.length + 2]; + int i; + for(i = 0; i < cfrFiles.length; i++) { + allFiles[i] = cfrFiles[i]; + } + allFiles[i++] = ramFiles[0]; + allFiles[i++] = ramFiles[1]; + return allFiles; + } + + @Override + public IndexInput openInput(String name) throws IOException { + if (doSegments(name)) + return ramdir.openInput(name); + else + return cfsDir.openInput(name); + } + + @Override + public IndexInput openInput(String name, int bufferSize) throws IOException { + if (doSegments(name)) + return ramdir.openInput(name, bufferSize); + else + return cfsDir.openInput(name, bufferSize); + + } + + @Override + public void touchFile(String name) throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * @param args + */ + public static void main(String[] args) { + // TODO Auto-generated method stub + + } + +} diff --git lucene/src/test/org/apache/lucene/store/TestCFSFileDirectory.java lucene/src/test/org/apache/lucene/store/TestCFSFileDirectory.java new file mode 100755 index 0000000..2269e9c --- /dev/null +++ lucene/src/test/org/apache/lucene/store/TestCFSFileDirectory.java @@ -0,0 +1,73 @@ +package org.apache.lucene.store; + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.CFSFileDirectory; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.LuceneTestCase; + +public class TestCFSFileDirectory extends LuceneTestCase { + + public void testBasic() throws Exception { + Directory ramdir = new RAMDirectory(); + fillDocs(ramdir); + Directory cfsdir = new CFSFileDirectory(ramdir, "_0.cfs"); + searchDocs(cfsdir); + } + + void fillDocs(Directory dir) throws Exception { + Field field = new Field("id", "id1", Field.Store.YES, Field.Index.NOT_ANALYZED); + Document doc = new Document(); + doc.add(field); + doc.add(new Field("keyword", "test", Field.Store.YES, Field.Index.NOT_ANALYZED)); + + IndexWriterConfig conf = new IndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer()); + ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(true); + IndexWriter writer = new IndexWriter(dir, conf); + writer.addDocument(doc); + field.setValue("id2"); + writer.addDocument(doc); + field.setValue("id3"); + writer.addDocument(doc); + writer.close(); + } + + public void searchDocs(Directory dir) throws CorruptIndexException, IOException { + Searcher searcher = new IndexSearcher(dir, true); + Query query = new TermQuery(new Term("keyword", "test")); + + // ensure that queries return expected results without DateFilter first + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + int result = 0; + for(int i=0;i<3;i++) { + Document doc2 = searcher.doc(hits[i].doc); + Field f = doc2.getField("id"); + if (f.stringValue().equals("id1")) + result |= 1; + else if (f.stringValue().equals("id2")) + result |= 2; + else if (f.stringValue().equals("id3")) + result |= 4; + else + fail("unexpected id field"); + } + searcher.close(); + assertEquals("did not see all IDs", 7, result); + } + + +}