Index: lucene/src/test/org/apache/lucene/index/TestCompoundFile.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestCompoundFile.java (revision 1126367) +++ lucene/src/test/org/apache/lucene/index/TestCompoundFile.java (working copy) @@ -648,4 +648,25 @@ } } + + public void testAddExternalFile() throws IOException { + createSequenceFile(dir, "d1", (byte) 0, 15); + + Directory newDir = newDirectory(); + CompoundFileWriter csw = new CompoundFileWriter(newDir, "d.csf"); + csw.addFile("d1", dir); + csw.close(); + + CompoundFileReader csr = new CompoundFileReader(newDir, "d.csf"); + IndexInput expected = dir.openInput("d1"); + IndexInput actual = csr.openInput("d1"); + assertSameStreams("d1", expected, actual); + assertSameSeekBehavior("d1", expected, actual); + expected.close(); + actual.close(); + csr.close(); + + newDir.close(); + } + } Index: lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 1126367) +++ lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy) @@ -20,7 +20,12 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.store.Directory; import org.apache.lucene.store.BufferedIndexInput; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; import java.io.IOException; import java.util.Collection; @@ -123,5 +128,37 @@ TestSegmentReader.checkNorms(mergedReader); mergedReader.close(); - } + } + + // Tests that SM.createCompoundFile works with a SegmentInfo that comes from + // an extenral Directory. + public void testCreateCompoundFileExternal() throws Exception { + // create a tiny index w/ as many files + Directory dir = newDirectory(); + LogMergePolicy lmp = newLogMergePolicy(false); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(lmp); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new Field("c", "some text", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + w.addDocument(doc); + w.commit(); + + Directory newDir = newDirectory(); + SegmentMerger sm = new SegmentMerger(newDir, 1, "newseg", null, null, null); + sm.createCompoundFile("new.cfs", w.segmentInfos.info(0)); + + CompoundFileReader cfr = new CompoundFileReader(newDir, "new.cfs"); + + // assert that all source files exist + for (String s : w.segmentInfos.info(0).files()) { + String ext = s.substring(s.indexOf('.')); + assertTrue("could not find file new." + ext, cfr.fileExists("new" + ext)); + } + cfr.close(); + + w.close(); + newDir.close(); + dir.close(); + } + } Index: lucene/src/test/org/apache/lucene/index/TestAddIndexes.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (revision 1126367) +++ lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (working copy) @@ -991,4 +991,48 @@ } + // LUCENE-3126: tests that if a non-CFS segment is copied, it is converted to + // a CFS, given MP preferences + public void testCopyIntoCFS() throws Exception { + // create an index, no CFS (so we can assert that existing segments are not affected) + Directory target = newDirectory(); + LogMergePolicy lmp = newLogMergePolicy(false); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null).setMergePolicy(lmp); + IndexWriter w = new IndexWriter(target, conf); + w.addDocument(new Document()); + w.commit(); + assertFalse(w.segmentInfos.info(0).getUseCompoundFile()); + + // prepare second index, no-CFS too. + Directory src = newDirectory(); + LogMergePolicy lmp2 = newLogMergePolicy(false); + IndexWriterConfig conf2 = newIndexWriterConfig(TEST_VERSION_CURRENT, null).setMergePolicy(lmp2); + IndexWriter w2 = new IndexWriter(src, conf2); + w2.addDocument(new Document()); + w2.close(); + + // Case 1: force 'CFS' on target + lmp.setUseCompoundFile(true); + lmp.setNoCFSRatio(1.0); + w.addIndexes(src); + w.commit(); + assertFalse("existing segments should not be modified by addIndexes", w.segmentInfos.info(0).getUseCompoundFile()); + assertTrue("segment should have been converted to a CFS by addIndexes", w.segmentInfos.info(1).getUseCompoundFile()); + + // Case 2: LMP disallows CFS + lmp.setUseCompoundFile(false); + w.addIndexes(src); + w.commit(); + assertFalse("segment should not have been converted to a CFS by addIndexes if MP disallows", w.segmentInfos.info(2).getUseCompoundFile()); + + w.close(); + + // cleanup + src.close(); + target.close(); + + // TODO: what about shared doc stores? + } + + // TODO add testUnrollCFS which splits open an incoming CFS segment if MP disallows that? } Index: lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java (revision 1126367) +++ lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java (working copy) @@ -60,6 +60,9 @@ /** temporary holder for the start of this file's data section */ long dataOffset; + + /** the directory which contains the file. */ + Directory dir; } // Before versioning started. @@ -119,6 +122,14 @@ * has been added already */ public void addFile(String file) { + addFile(file, directory); + } + + /** + * Same as {@link #addFile(String)}, only denotes that the given file is found + * in an external {@link Directory}. + */ + public void addFile(String file, Directory dir) { if (merged) throw new IllegalStateException( "Can't add extensions after merge has been called"); @@ -133,6 +144,7 @@ FileEntry entry = new FileEntry(); entry.file = file; + entry.dir = dir; entries.add(entry); } @@ -170,7 +182,7 @@ fe.directoryOffset = os.getFilePointer(); os.writeLong(0); // for now os.writeString(IndexFileNames.stripSegmentName(fe.file)); - totalSize += directory.fileLength(fe.file); + totalSize += fe.dir.fileLength(fe.file); } // Pre-allocate size of file as optimization -- @@ -216,7 +228,7 @@ * output stream. */ private void copyFile(FileEntry source, IndexOutput os) throws IOException { - IndexInput is = directory.openInput(source.file); + IndexInput is = source.dir.openInput(source.file); try { long startPtr = os.getFilePointer(); long length = is.length(); Index: lucene/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1126367) +++ lucene/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -124,7 +124,7 @@ Collection files = info.files(); CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort); for (String file : files) { - cfsWriter.addFile(file); + cfsWriter.addFile(file, info.dir); } // Perform the merge Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexWriter.java (revision 1126367) +++ lucene/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -3023,10 +3023,10 @@ *

* NOTE: this method only copies the segments of the incomning indexes * and does not merge them. Therefore deleted documents are not removed and - * the new segments are not merged with the existing ones. Also, the segments - * are copied as-is, meaning they are not converted to CFS if they aren't, - * and vice-versa. If you wish to do that, you can call {@link #maybeMerge} - * or {@link #optimize} afterwards. + * the new segments are not merged with the existing ones. Also, if the merge + * policy allows compound files, then any segment that is not compound is + * converted to such. However, if the segment is compound, it is copied as-is + * even if the merge policy does not allow compound files. * *

*

This requires this index not be among those to be added. @@ -3069,13 +3069,17 @@ if (infoStream != null) { message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info); } + + // create CFS only if the source segment is not CFS, and MP agrees it + // should be CFS. + boolean createCFS = !info.getUseCompoundFile() && mergePolicy.useCompoundFile(segmentInfos, info); // Determine if the doc store of this segment needs to be copied. It's - // only relevant for segments who share doc store with others, because - // the DS might have been copied already, in which case we just want - // to update the DS name of this SegmentInfo. + // only relevant for segments that share doc store with others, + // because the DS might have been copied already, in which case we + // just want to update the DS name of this SegmentInfo. // NOTE: pre-3x segments include a null DSName if they don't share doc - // store. So the following code ensures we don't accidentally insert + // store. The following code ensures we don't accidentally insert // 'null' to the map. final String newDsName; if (dsName != null) { @@ -3101,14 +3105,27 @@ } else { newFileName = newSegName + IndexFileNames.stripSegmentName(file); } - assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists"; - dir.copy(directory, file, newFileName); + + if (!createCFS) { + assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists"; + dir.copy(directory, file, newFileName); + } } + if (createCFS) { + String segFileName = IndexFileNames.segmentFileName(newDsName, IndexFileNames.COMPOUND_FILE_EXTENSION); + SegmentMerger sm = new SegmentMerger(directory, + config.getTermIndexInterval(), segFileName, null, + payloadProcessorProvider, ((FieldInfos) docWriter + .getFieldInfos().clone())); + sm.createCompoundFile(segFileName, info); + } + // Update SI appropriately - info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile()); + info.setDocStore(info.getDocStoreOffset(), newDsName, createCFS); info.dir = directory; info.name = newSegName; + info.setUseCompoundFile(createCFS); infos.add(info); }