Index: contrib/miscellaneous/src/java/org/apache/lucene/index/CfsExtractor.java =================================================================== --- contrib/miscellaneous/src/java/org/apache/lucene/index/CfsExtractor.java (revision 0) +++ contrib/miscellaneous/src/java/org/apache/lucene/index/CfsExtractor.java (revision 0) @@ -0,0 +1,137 @@ +package org.apache.lucene.index; + +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IndexInput; + + +/** + * Extracts the content of a the Lucene index CFS file into a given directory, + * or just prints the CFS file content. + */ +public class CfsExtractor { + + public static void main(String[] args) throws Exception { + String filename = null; + String outDir = null; + boolean extract = false; + + for (int i = 0; i < args.length; ++i) { + if (args[i].equals("-extract")) { + extract = true; + } else if (filename == null) { + filename = args[i]; + } else if (outDir == null) { + outDir = args[i]; + } + } + + if (filename == null) { + System.err.println("Usage: org.apache.lucene.index.CfsExtractor [-extract] "); + return; + } + if (extract) + (new CfsExtractor(filename)).extract(outDir); + else + (new CfsExtractor(filename)).print(); + } + + private String cfsFile; + private Directory dir = null; + private CompoundFileReader cfr = null; + private String[] files; + + /** + * Opens the given CFS file and reads in the files it contains. + * @param cfsFile the CFS file to read and later extract + */ + public CfsExtractor(String cfsFile) { + this.cfsFile = cfsFile; + + try { + File file = new File(cfsFile); + String dirname = file.getAbsoluteFile().getParent(); + cfsFile = file.getName(); + dir = FSDirectory.getDirectory(dirname, false); + cfr = new CompoundFileReader(dir, cfsFile); + + files = cfr.list(); + Arrays.sort(files); // sort the array of filename so that the output is more readable + + } catch (IOException ioe) { + ioe.printStackTrace(); + } + finally { + try { + if (dir != null) + dir.close(); + if (cfr != null) + cfr.close(); + } + catch (IOException ioe) { + ioe.printStackTrace(); + } + } + } + + /** + * Extracts files from CFS to the current working directory. + * Also prints the filename and size of each file within a given compound file. + * In order to make the extracted version of the index work, you have to copy + * the segments file from the compound index into the directory where the extracted files are stored. + */ + public void extract(String outDir) throws IOException { + for (int i = 0; i < files.length; ++i) { + long len = cfr.fileLength(files[i]); + System.out.println("extract " + files[i] + " with " + len + " bytes to local directory..."); + + IndexInput ii = cfr.openInput(files[i]); + FileOutputStream fos = new FileOutputStream(new File(outDir, files[i])); + + // read and write with a small buffer, which is more effective than reading byte by byte + byte[] buffer = new byte[1024]; + int chunk = buffer.length; + while(len > 0) { + final int bufLen = (int) Math.min(chunk, len); + ii.readBytes(buffer, 0, bufLen); + fos.write(buffer, 0, bufLen); + len -= bufLen; + } + fos.close(); + ii.close(); + } + } + + /** + * Prints the filename and size of each file within a given compound file. + * In order to make the extracted version of the index work, you have to copy + * the segments file from the compound index into the directory where the extracted files are stored. + */ + public void print() throws IOException { + for (int i = 0; i < files.length; ++i) { + long len = cfr.fileLength(files[i]); + System.out.println(files[i] + ": " + len + " bytes"); + } + } +}