Index: contrib/CHANGES.txt =================================================================== --- contrib/CHANGES.txt (revision 823097) +++ contrib/CHANGES.txt (working copy) @@ -33,6 +33,11 @@ segment merges to give better search performance in a mixed indexing/searching environment. (John Wang via Mike McCandless) + * LUCENE-1959: Add IndexSplitter tool, to copy specific segments out + of the index into a new index. It can also list the segments in + the index, and delete specified segments. (Jason Rutherglen via + Mike McCandless) + Optimizations Documentation Index: contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java =================================================================== --- contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java (revision 0) +++ contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java (revision 0) @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.File; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestIndexSplitter extends LuceneTestCase { + public void test() throws Exception { + String tmpDir = System.getProperty("java.io.tmpdir"); + File dir = new File(tmpDir, "testfilesplitter"); + _TestUtil.rmDir(dir); + dir.mkdirs(); + File destDir = new File(tmpDir, "testfilesplitterdest"); + _TestUtil.rmDir(destDir); + destDir.mkdirs(); + FSDirectory fsDir = FSDirectory.open(dir); + IndexWriter iw = new IndexWriter(fsDir, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); + for (int x=0; x < 100; x++) { + Document doc = TestIndexWriterReader.createDocument(x, "index", 5); + iw.addDocument(doc); + } + iw.commit(); + for (int x=100; x < 150; x++) { + Document doc = TestIndexWriterReader.createDocument(x, "index2", 5); + iw.addDocument(doc); + } + iw.commit(); + for (int x=150; x < 200; x++) { + Document doc = TestIndexWriterReader.createDocument(x, "index3", 5); + iw.addDocument(doc); + } + iw.commit(); + assertEquals(3, iw.getReader().getSequentialSubReaders().length); + iw.close(); + // we should have 2 segments now + IndexSplitter is = new IndexSplitter(dir); + String splitSegName = is.infos.info(1).name; + is.split(destDir, new String[] {splitSegName}); + IndexReader r = IndexReader.open(FSDirectory.open(destDir), true); + assertEquals(50, r.maxDoc()); + + // now test cmdline + File destDir2 = new File(tmpDir, "testfilesplitterdest2"); + _TestUtil.rmDir(destDir2); + destDir2.mkdirs(); + IndexSplitter.main(new String[] {dir.getAbsolutePath(), destDir2.getAbsolutePath(), splitSegName}); + assertEquals(3, destDir2.listFiles().length); + r = IndexReader.open(FSDirectory.open(destDir2), true); + assertEquals(50, r.maxDoc()); + + // now remove the copied segment from src + IndexSplitter.main(new String[] {dir.getAbsolutePath(), "-d", splitSegName}); + r = IndexReader.open(FSDirectory.open(dir), true); + assertEquals(2, r.getSequentialSubReaders().length); + } +} Property changes on: contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java ___________________________________________________________________ Added: svn:eol-style + native Index: contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java =================================================================== --- contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java (revision 0) +++ contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java (revision 0) @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.store.FSDirectory; + +/** + * Command-line tool that enables listing segments in an + * index, copying specific segments to another index, and + * deleting segments from an index. + * + *
This tool does file-level copying of segments files. + * This means it's unable to split apart a single segment + * into multiple segments. For example if your index is + * optimized, this tool won't help. Also, it does basic + * file-level copying (using simple + * File{In,Out}putStream) so it will not work with non + * FSDirectory Directory impls.
+ * + *NOTE: The tool is experimental and might change
+ * in incompatible ways in the next release. You can easily
+ * accidentally remove segments from your index so be
+ * careful!
+ */
+public class IndexSplitter {
+ public SegmentInfos infos;
+
+ FSDirectory fsDir;
+
+ File dir;
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args) throws Exception {
+ if (args.length < 2) {
+ System.err
+ .println("Usage: IndexSplitter