Index: src/test/org/apache/lucene/index/TestFieldDataStore.java =================================================================== RCS file: src/test/org/apache/lucene/index/TestFieldDataStore.java diff -N src/test/org/apache/lucene/index/TestFieldDataStore.java --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/test/org/apache/lucene/index/TestFieldDataStore.java 1 Jan 1970 00:00:00 -0000 @@ -0,0 +1,237 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Enumeration; +import java.util.TreeSet; + +import junit.framework.TestCase; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; + +/** + * Tests segmented field data storage (files *.fdt, *.fd1, *.fd2 etc.) + * + * @author Christian Kohlschuetter + */ +public class TestFieldDataStore extends TestCase { + private Analyzer a = new WhitespaceAnalyzer(); + + /* + * @see TestCase#setUp() + */ + protected void setUp() throws Exception { + super.setUp(); + } + + /* + * @see TestCase#tearDown() + */ + protected void tearDown() throws Exception { + super.tearDown(); + } + + public void test1_regular() throws IOException { + internal1(false); + } + + public void test1_compound() throws IOException { + internal1(true); + } + + private void internal1(boolean useCompound) throws IOException { + RAMDirectory dir = new RAMDirectory(); + + IndexWriter iw = new IndexWriter(dir, a, true); + iw.setUseCompoundFile(useCompound); + + Document doc; + + /** + * Create three test documents + * + * Document 1 and 3 are regular (docStore is always == 0) + * + * Document 2 contains nine fields: two in docStore 0, three in + * docStore 1 and four in docStore 2 + */ + + doc = new Document(); + doc.add(Field.Keyword("regular", "doc1")); + iw.addDocument(doc); + + doc = new Document(); + doc.add(Field.Keyword("FIELD_0_STORE_0", "0/0")); + doc.add(Field.Keyword("FIELD_1_STORE_0", "1/0")); + doc.add(Field.Keyword("FIELD_0_STORE_1", "0/1").setDataStore(1)); + doc.add(Field.Keyword("FIELD_1_STORE_1", "1/1").setDataStore(1)); + doc.add(Field.Keyword("FIELD_2_STORE_1", "2/1").setDataStore(1)); + doc.add(Field.Keyword("FIELD_0_STORE_2", "0/2").setDataStore(2)); + doc.add(Field.Keyword("FIELD_1_STORE_2", "1/2").setDataStore(2)); + doc.add(Field.Keyword("FIELD_2_STORE_2", "2/2").setDataStore(2)); + doc.add(Field.Keyword("FIELD_3_STORE_2", "3/2").setDataStore(2)); + iw.addDocument(doc); + + /** + * This document cannot be added since the dataStoreID is + * non-contiguous. We will get an IOException when calling addDocument(doc) + */ + doc = new Document(); + doc.add(Field.Keyword("regular", "doc")); + doc.add(Field.Keyword("BADFIELD", "99").setDataStore(99)); + + boolean ok = false; + try { + iw.addDocument(doc); + } catch (IOException e) { + ok = true; + } + assertTrue("Did not receive expected IOException", ok); + + /** + * A regular document again + */ + doc = new Document(); + doc.add(Field.Keyword("regular", "doc2")); + iw.addDocument(doc); + + iw.close(); + + /** + * Now check the index + */ + IndexReader ir = IndexReader.open(dir); + Enumeration e; + Field f; + assertEquals(3, ir.numDocs()); + + TreeSet tsExpected = new TreeSet(Arrays.asList(new String[] { + "", // is this correct? + "FIELD_0_STORE_0", "FIELD_1_STORE_0", "FIELD_0_STORE_1", + "FIELD_1_STORE_1", "FIELD_2_STORE_1", "FIELD_0_STORE_2", + "FIELD_1_STORE_2", "FIELD_2_STORE_2", "FIELD_3_STORE_2", "regular" + })); + + TreeSet tsReal = new TreeSet(ir.getFieldNames()); + assertEquals(tsExpected, tsReal); + + /** + * Checks a regular document with all field data in the standard .fdt + * file (field store 0) + */ + doc = ir.document(0); + e = doc.fields(); + f = (Field) e.nextElement(); + assertEquals("regular", f.name()); + assertEquals("doc1", f.stringValue()); + assertEquals(0, f.getDataStore()); + assertFalse(e.hasMoreElements()); + + /** + * Checks ir.document(1,0) and ir.document(1,1) and ir.document(1,2) + */ + for(int sm = 0; sm <= 2; sm++) { + doc = ir.document(1, sm); + e = doc.fields(); + for(int sn = 0; sn <= sm; sn++) { + for(int fn = 0; fn < 2 + sn * 1; fn++) { + f = (Field) e.nextElement(); + assertEquals("FIELD_" + fn + "_STORE_" + sn, f.name()); + assertEquals(fn + "/" + sn, f.stringValue()); + assertEquals(sn, f.getDataStore()); + } + } + + /** + * The following assert will fail with sm==0 or sm==1 if a + * docStore-unaware IndexReader is used as in that case, all field + * stores are read (see {@link IndexReader.document(int, int)}) + * + * Solution: Overload {@link IndexReader.document(int, int)}in the + * specific subclass. + */ + assertFalse(e.hasMoreElements()); + } + + /** + * Checks ir.document(1), which is equivalent to ir.document(1,2) in + * this test scenario + */ + doc = ir.document(1); + e = doc.fields(); + for(int sn = 0; sn <= 2; sn++) { + for(int fn = 0; fn < 2 + sn * 1; fn++) { + f = (Field) e.nextElement(); + assertEquals("FIELD_" + fn + "_STORE_" + sn, f.name()); + assertEquals(fn + "/" + sn, f.stringValue()); + assertEquals(sn, f.getDataStore()); + } + } + assertFalse(e.hasMoreElements()); + + /** + * Checks a regular document with all field data in the standard .fdt + * file (field store 0) + */ + doc = ir.document(2); + e = doc.fields(); + f = (Field) e.nextElement(); + assertEquals("regular", f.name()); + assertEquals("doc2", f.stringValue()); + assertEquals(0, f.getDataStore()); + assertFalse(e.hasMoreElements()); + + /** + * Check directory contents + * + * This sub-test will fail when new file types are introduced. + */ + String[] names; + if(useCompound) { + // this also tests CompoundDirectoryWrapper's functionality + Directory cd = new CompoundDirectoryWrapper(dir); + names = cd.list(); + } else { + names = dir.list(); + } + + TreeSet files = getSuffixes(names); + checkExpectedSuffixes(files); + + ir.close(); + } + + private TreeSet getSuffixes(String[] filenames) { + TreeSet files = new TreeSet(); + for(int i = 0; i < filenames.length; i++) { + String name = filenames[i]; + int dot = name.indexOf('.'); + if(dot != -1) { + name = name.substring(dot + 1); + } + files.add(name); + } + return files; + } + + private void checkExpectedSuffixes(TreeSet files) { + TreeSet expectedFiles = new TreeSet(Arrays.asList(new String[] { + "deletable", "segments", "fdx", "fnm", "frq", "prx", "tii", + "tis" + })); + + for(int i = 1; i <= 10; i++) { + expectedFiles.add("f" + i); + } + expectedFiles.add("fdt"); + expectedFiles.add("fd1"); + expectedFiles.add("fd2"); + + assertEquals(expectedFiles, files); + } +} Index: src/test/org/apache/lucene/store/TestCachedDirectory.java =================================================================== RCS file: src/test/org/apache/lucene/store/TestCachedDirectory.java diff -N src/test/org/apache/lucene/store/TestCachedDirectory.java --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/test/org/apache/lucene/store/TestCachedDirectory.java 1 Jan 1970 00:00:00 -0000 @@ -0,0 +1,200 @@ +package org.apache.lucene.store; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.TreeSet; + +import junit.framework.TestCase; + +/** + * Tests {@link CachedDirectory} + * + * @author Christian Kohlschuetter + */ +public class TestCachedDirectory extends TestCase { + private Directory underlyingDir; + private File tmpDir; + + public void setUp() throws IOException { + // Create a temporary FSDirectory + tmpDir = File.createTempFile("test", "lucene"); + tmpDir.delete(); + tmpDir.mkdir(); + underlyingDir = FSDirectory.getDirectory(tmpDir, true); + } + + public void tearDown() throws IOException { + assertTrue(underlyingDir.list().length == 0); + File[] files = tmpDir.listFiles(); + assertEquals("tmpDir should be empty", 0, files.length); + tmpDir.delete(); + } + + /** + * Tests the creation of files directly with CachedDirectory. + * + * @throws IOException + */ + public void testCreate() throws IOException { + CachedDirectory cd = new CachedDirectory(underlyingDir, TestFileFilter + .getInstance()); + Directory cacheRamDir = cd.getCacheDirectory(); + + OutputStream out; + InputStream in; + + /** + * Check behaviour of uncached files + */ + out = cd.createFile("test.txt"); + out.writeString("this file is not cached"); + assertTrue(cd.fileExists("test.txt")); + assertFalse(cacheRamDir.fileExists("test.txt")); + assertTrue(underlyingDir.fileExists("test.txt")); + out.close(); + assertTrue(cd.fileExists("test.txt")); + assertFalse(cacheRamDir.fileExists("test.txt")); + assertTrue(underlyingDir.fileExists("test.txt")); + in = cd.openFile("test.txt"); + assertTrue(in instanceof FSInputStream); + assertEquals("this file is not cached", in.readString()); + assertEquals(in.length(), in.getFilePointer()); + in.close(); + + /** + * Check behaviour of cached files + */ + out = cd.createFile("test.cac"); + out.writeString("this file IS cached"); + assertTrue(cd.fileExists("test.cac")); + assertTrue(cacheRamDir.fileExists("test.cac")); + assertFalse(underlyingDir.fileExists("test.cac")); + out.close(); + + assertTrue(cd.fileExists("test.cac")); + assertTrue(cacheRamDir.fileExists("test.cac")); + assertTrue(underlyingDir.fileExists("test.cac")); + in = cd.openFile("test.cac"); + assertTrue(in instanceof RAMInputStream); + assertEquals("this file IS cached", in.readString()); + assertEquals(in.length(), in.getFilePointer()); + in.close(); + + cd.deleteFile("test.cac"); + cd.deleteFile("test.txt"); + } + + /** + * Tests behaviour when using CacheDirectory as a wrapper on Directories + * already containing files. + * + * @throws IOException + */ + public void testExistingDirectories() throws IOException { + + OutputStream out; + InputStream in; + + out = underlyingDir.createFile("test.txt"); + out.writeString("this file should not be cached"); + out.close(); + + out = underlyingDir.createFile("test.cac"); + out.writeString("this file can be cached"); + out.close(); + + CachedDirectory cd = new CachedDirectory(underlyingDir, TestFileFilter + .getInstance()); + Directory cacheRamDir = cd.getCacheDirectory(); + + assertTrue(cd.fileExists("test.txt")); + assertTrue(cd.fileExists("test.cac")); + + // Nothing is cached right now + assertFalse(cacheRamDir.fileExists("test.txt")); + assertFalse(cacheRamDir.fileExists("test.cac")); + + in = cd.openFile("test.txt"); + assertTrue(in instanceof FSInputStream); + assertFalse(cacheRamDir.fileExists("test.txt")); + assertEquals("this file should not be cached", in.readString()); + in.close(); + + in = cd.openFile("test.cac"); + assertTrue(in instanceof RAMInputStream); + assertTrue(cacheRamDir.fileExists("test.cac")); + assertEquals(cacheRamDir.fileLength("test.cac"), underlyingDir + .fileLength("test.cac")); + assertEquals("this file can be cached", in.readString()); + in.close(); + + // remove file from cache by renaming it to a filename marked as + // uncacheable + cd.renameFile("test.cac", "test2.txt"); + assertFalse(cacheRamDir.fileExists("test.cac")); + assertFalse(cacheRamDir.fileExists("test2.txt")); + assertFalse(underlyingDir.fileExists("test.cac")); + assertTrue(underlyingDir.fileExists("test2.txt")); + assertFalse(cd.fileExists("test.cac")); + assertTrue(cd.fileExists("test2.txt")); + + // validate file contents + in = cd.openFile("test2.txt"); + assertTrue(in instanceof FSInputStream); + assertEquals("this file can be cached", in.readString()); + in.close(); + + // now force caching of file test.txt + cd.cacheFile("test.txt"); + assertTrue(underlyingDir.fileExists("test.txt")); + assertTrue(cacheRamDir.fileExists("test.txt")); + + // validate its file contents + in = cd.openFile("test.txt"); + assertTrue(in instanceof RAMInputStream); + assertEquals("this file should not be cached", in.readString()); + in.close(); + + // check directory contents + assertEquals(new TreeSet(Arrays.asList(new String[] { + "test.txt" + })), new TreeSet(Arrays.asList(cacheRamDir.list()))); + assertEquals(new TreeSet(Arrays.asList(new String[] { + "test2.txt", "test.txt" + })), new TreeSet(Arrays.asList(underlyingDir.list()))); + assertEquals(new TreeSet(Arrays.asList(new String[] { + "test2.txt", "test.txt" + })), new TreeSet(Arrays.asList(cd.list()))); + + // delete file "test2.txt" + cd.deleteFile("test2.txt"); + assertFalse(cd.fileExists("test2.txt")); + assertFalse(underlyingDir.fileExists("test2.txt")); + assertFalse(cacheRamDir.fileExists("test2.txt")); + + // delete file "test.txt" + cd.deleteFile("test.txt"); + + // Now all directories should be empty + assertTrue(cd.list().length == 0); + assertTrue(cacheRamDir.list().length == 0); + } + + /** + * This FileFilter will only accept files with suffix ".cac" + * + * @author Christian Kohlschuetter + */ + private static class TestFileFilter implements FileFilter { + private static final TestFileFilter INSTANCE = new TestFileFilter(); + + public static TestFileFilter getInstance() { + return INSTANCE; + } + + public boolean accept(Directory dir, String name) { + return name.endsWith(".cac"); + } + } +}