CompoundFileWriter.java: 52a53,56 > /** Not final for testing purposes. */ > public static boolean WRITE_LENGTHS = true; > 61a66,67 > > InputStream in; 141a148,153 > // We use the first int as a version number. Because Lucene doesn't currently have > // a version number, we need a way to distinguish which is our version and which is > // theirs. Thus, we use an absurdly high number that would never be the number of files > // for a given segment. > if (WRITE_LENGTHS) os.writeVInt(Integer.MAX_VALUE); > 145,147d156 < // Write the directory with all offsets at 0. < // Remember the positions of directory entries so that we can < // adjust the offsets later 152c161 < os.writeLong(0); // for now --- > // use negative numbers here to indicate lengths instead of offsets 152a162,170 > if (WRITE_LENGTHS) { > fe.in = directory.openFile(fe.file); > os.writeLong(fe.in.length()); > } else { > // Write the directory with all offsets at 0. > // Remember the positions of directory entries so that we can > // adjust the offsets later > os.writeLong(0); // for now > } 165a184 > if (!WRITE_LENGTHS) { 172a192 > } 198c218 < is = directory.openFile(source.file); --- > is = WRITE_LENGTHS ? source.in : directory.openFile(source.file); CompoundFileReader.java: 46,48d43 < // Reference count < private boolean open; 51c46 < private HashMap entries = new HashMap(); --- > private final HashMap entries; 62a58 > boolean headerHasOffsets; 64a61,76 > int count; > // We use the first int as a version number. Because Lucene doesn't currently have > // a version number, we need a way to distinguish which is our version and which is > // theirs. Thus, we use an absurdly high number that would never be the number of files > // for a given segment. > int firstInt = stream.readVInt(); > if (firstInt == Integer.MAX_VALUE) { > // this is the sfdc specific format > headerHasOffsets = false; > count = stream.readVInt(); > } else { > // this is the default Lucene CFS format > headerHasOffsets = true; > count = firstInt; > } > 66c78 < int count = stream.readVInt(); --- > entries = headerHasOffsets 66a79,93 > ? loadEntriesUsingOffsets(count, stream) > : loadEntriesUsingLengths(count, stream); > > success = true; > } finally { > if (! success && (stream != null)) { > try { > stream.close(); > } catch (IOException e) { } > } > } > } > > private HashMap loadEntriesUsingOffsets(int count, InputStream stream) throws IOException { > HashMap entries = new HashMap(count * 3 / 2); 81d107 < 87c113 < success = true; --- > return entries; 87a114,130 > } > > private HashMap loadEntriesUsingLengths(int count, InputStream stream) throws IOException { > ArrayList entryList = new ArrayList(count); > ArrayList idList = new ArrayList(count); > ArrayList offsetList = new ArrayList(count); > > long offset = 0; > for (int i = 0; i < count; i++) { > FileEntry entry = new FileEntry(); > entry.length = stream.readLong(); > String id = stream.readString(); > > entryList.add(entry); > idList.add(id); > if (i > 0) offset += entryList.get(i-1).length; > offsetList.add(offset); 89,93d131 < } finally { < if (! success && (stream != null)) { < try { < stream.close(); < } catch (IOException e) { } 94a133,140 > > // all of our offsets that we've tracked are relative to the end of the header > long startOfData = stream.getFilePointer(); > HashMap entries = new HashMap(count * 3 / 2); > for (int i = 0; i < count; i++) { > FileEntry entry = entryList.get(i); > entry.offset = startOfData + offsetList.get(i); > entries.put(idList.get(i), entry); 95a142 > return entries; 97a145 > package org.apache.lucene.index; import java.util.Random; import junit.framework.TestCase; import org.apache.lucene.store.*; /** * Tests that compound files written using either the default Lucene * compound file format (offsets per file) and the modified file * format (lengths per file) are the same. * * @author koliver */ public class CompoundFileFormat extends TestCase { private static final Random RANDOM = new Random(5); // use a seed so tests are reproducible public CompoundFileFormat(String name) { super(name); } private RAMDirectory createRamDirectory(byte[]... dataFiles) throws Exception { RAMDirectory ramDir = new RAMDirectory(); for (int i = 0; i < dataFiles.length; i++) { OutputStream os = ramDir.createFile("test"+(i+1)); os.writeBytes(dataFiles[i], dataFiles[i].length); os.close(); } return ramDir; } private void writeCompoundFile(boolean writeLengths, RAMDirectory ramDir) throws Exception { CompoundFileWriter.WRITE_LENGTHS = writeLengths; CompoundFileWriter offsets = new CompoundFileWriter(ramDir, "name"); offsets.addFile("test1"); offsets.addFile("test2"); offsets.addFile("test3"); offsets.close(); } public static void fill(byte[] b) { RANDOM.nextBytes(b); } public void testWritingLengthsVsOffsets() throws Exception { // setup byte[] data1 = new byte[123]; fill(data1); byte[] data2 = new byte[1025]; fill(data2); byte[] data3 = new byte[1]; fill(data3); RAMDirectory offsetRamDir = createRamDirectory(data1, data2, data3); writeCompoundFile(false, offsetRamDir); CompoundFileReader offsetReader = new CompoundFileReader(offsetRamDir, "name"); RAMDirectory lengthRamDir = createRamDirectory(data1, data2, data3); writeCompoundFile(true, lengthRamDir); CompoundFileReader lengthReader = new CompoundFileReader(lengthRamDir, "name"); // tests String[] offsetNames = offsetReader.list(); String[] lengthNames = lengthReader.list(); assertEquals(3, offsetNames.length); assertEquals(offsetNames.length, lengthNames.length); for (int i = 0; i < offsetNames.length; i++) { assertEquals(offsetNames[i], lengthNames[i]); } for (String file : offsetNames) { InputStream offsetIn = offsetReader.openFile(file); InputStream lengthIn = lengthReader.openFile(file); assertEquals("File: " + file, offsetIn.length(), lengthIn.length()); byte[] offsetData = new byte[(int)offsetIn.length()]; byte[] lengthData = new byte[(int)offsetIn.length()]; for (int b = 0; b < offsetData.length; b++) { assertEquals("Position: " + b, offsetData[b], lengthData[b]); } offsetIn.close(); lengthIn.close(); } // tear down offsetReader.close(); offsetRamDir.close(); lengthReader.close(); lengthRamDir.close(); } }