Index: src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- src/test/org/apache/lucene/index/TestDoc.java (revision 496665) +++ src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -108,21 +108,21 @@ Directory directory = FSDirectory.getDirectory(indexDir, true); directory.close(); - indexDoc("one", "test.txt"); - printSegment(out, "one", 1); + SegmentInfo si1 = indexDoc("one", "test.txt"); + printSegment(out, si1); - indexDoc("two", "test2.txt"); - printSegment(out, "two", 1); + SegmentInfo si2 = indexDoc("two", "test2.txt"); + printSegment(out, si2); - merge("one", 1, "two", 1, "merge", false); - printSegment(out, "merge", 2); + SegmentInfo siMerge = merge(si1, si2, "merge", false); + printSegment(out, siMerge); - merge("one", 1, "two", 1, "merge2", false); - printSegment(out, "merge2", 2); + SegmentInfo siMerge2 = merge(si1, si2, "merge2", false); + printSegment(out, siMerge2); - merge("merge", 2, "merge2", 2, "merge3", false); - printSegment(out, "merge3", 4); - + SegmentInfo siMerge3 = merge(siMerge, siMerge2, "merge3", false); + printSegment(out, siMerge3); + out.close(); sw.close(); String multiFileOutput = sw.getBuffer().toString(); @@ -134,21 +134,21 @@ directory = FSDirectory.getDirectory(indexDir, true); directory.close(); - indexDoc("one", "test.txt"); - printSegment(out, "one", 1); + si1 = indexDoc("one", "test.txt"); + printSegment(out, si1); - indexDoc("two", "test2.txt"); - printSegment(out, "two", 1); + si2 = indexDoc("two", "test2.txt"); + printSegment(out, si2); - merge("one", 1, "two", 1, "merge", true); - printSegment(out, "merge", 2); + siMerge = merge(si1, si2, "merge", true); + printSegment(out, siMerge); - merge("one", 1, "two", 1, "merge2", true); - printSegment(out, "merge2", 2); + siMerge2 = merge(si1, si2, "merge2", true); + printSegment(out, siMerge2); - merge("merge", 2, "merge2", 2, "merge3", true); - printSegment(out, "merge3", 4); - + siMerge3 = merge(siMerge, siMerge2, "merge3", true); + printSegment(out, siMerge3); + out.close(); sw.close(); String singleFileOutput = sw.getBuffer().toString(); @@ -157,7 +157,7 @@ } - private void indexDoc(String segment, String fileName) + private SegmentInfo indexDoc(String segment, String fileName) throws Exception { Directory directory = FSDirectory.getDirectory(indexDir, false); @@ -171,18 +171,18 @@ writer.addDocument(segment, doc); directory.close(); + return new SegmentInfo(segment, 1, directory, false, false); } - private void merge(String seg1, int docCount1, String seg2, int docCount2, String merged, boolean useCompoundFile) + private SegmentInfo merge(SegmentInfo si1, SegmentInfo si2, String merged, boolean useCompoundFile) throws Exception { Directory directory = FSDirectory.getDirectory(indexDir, false); - SegmentReader r1 = SegmentReader.get(new SegmentInfo(seg1, docCount1, directory)); - SegmentReader r2 = SegmentReader.get(new SegmentInfo(seg2, docCount2, directory)); + SegmentReader r1 = SegmentReader.get(si1); + SegmentReader r2 = SegmentReader.get(si2); - SegmentMerger merger = - new SegmentMerger(directory, merged); + SegmentMerger merger = new SegmentMerger(directory, merged); merger.add(r1); merger.add(r2); @@ -196,14 +196,14 @@ } directory.close(); + return new SegmentInfo(merged, si1.docCount + si2.docCount, directory, useCompoundFile, true); } - private void printSegment(PrintWriter out, String segment, int docCount) + private void printSegment(PrintWriter out, SegmentInfo si) throws Exception { Directory directory = FSDirectory.getDirectory(indexDir, false); - SegmentReader reader = - SegmentReader.get(new SegmentInfo(segment, docCount, directory)); + SegmentReader reader = SegmentReader.get(si); for (int i = 0; i < reader.numDocs(); i++) out.println(reader.document(i)); Index: src/test/org/apache/lucene/index/TestSegmentMerger.java =================================================================== --- src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 496665) +++ src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy) @@ -70,7 +70,7 @@ merger.closeReaders(); assertTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory - SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir)); + SegmentReader mergedReader = SegmentReader.get(new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, true)); assertTrue(mergedReader != null); assertTrue(mergedReader.numDocs() == 2); Document newDoc1 = mergedReader.document(0); Index: src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 496723) +++ src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -85,48 +85,36 @@ rmDir(dirName); } - public void testSearchOldIndexCFS() throws IOException { - String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs"; - unzip(dirName); - searchIndex(dirName); - rmDir(dirName); + public void testSearchOldIndex() throws IOException { + String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"}; + for(int i=0;i= 0 */ public static final int FORMAT = -1; - /** This is the current file format written. It differs + /** This format adds details used for lockless commits. It differs * slightly from the previous format in that file names * are never re-used (write once). Instead, each file is * written to the next generation. For example, @@ -44,6 +44,13 @@ */ public static final int FORMAT_LOCKLESS = -2; + /** This is the current file format written. It adds a + * "mergedNorms" flag into each segment info. + * See LUCENE-756 + * for details. + */ + public static final int FORMAT_MERGED_NORMS = -3; + public int counter = 0; // used to name new segments /** * counts how often the index has been changed by adding or deleting docs. @@ -184,7 +191,7 @@ int format = input.readInt(); if(format < 0){ // file contains explicit format info // check that it is a format we can understand - if (format < FORMAT_LOCKLESS) + if (format < FORMAT_MERGED_NORMS) throw new IOException("Unknown format version: " + format); version = input.readLong(); // read version counter = input.readInt(); // read counter @@ -245,7 +252,7 @@ IndexOutput output = directory.createOutput(segmentFileName); try { - output.writeInt(FORMAT_LOCKLESS); // write FORMAT + output.writeInt(FORMAT_MERGED_NORMS); // write FORMAT output.writeLong(++version); // every write changes // the index output.writeInt(counter); // write counter @@ -311,7 +318,7 @@ try { format = input.readInt(); if(format < 0){ - if (format < FORMAT_LOCKLESS) + if (format < FORMAT_MERGED_NORMS) throw new IOException("Unknown format version: " + format); version = input.readLong(); // read version } Index: src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfo.java (revision 496665) +++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -44,10 +44,11 @@ // pre-2.1 (ie, must check file system to see // if .cfs and .nrm exist) - private byte withNrm; // 1 if this segment maintains norms in a single file; - // -1 if not; 0 if check file is required to tell. - // would be -1 for segments populated by DocumentWriter. - // would be 1 for (newly created) merge resulted segments (both compound and non compound). + private boolean hasMergedNorms; // true if this segment maintains norms in a single file; + // false otherwise + // this is currently false for segments populated by DocumentWriter + // and true for newly created merged segments (both + // compound and non compound). public SegmentInfo(String name, int docCount, Directory dir) { this.name = name; @@ -56,13 +57,13 @@ delGen = -1; isCompoundFile = 0; preLockless = true; - withNrm = 0; + hasMergedNorms = false; } - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean withNrm) { + public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasMergedNorms) { this(name, docCount, dir); this.isCompoundFile = (byte) (isCompoundFile ? 1 : -1); - this.withNrm = (byte) (withNrm ? 1 : -1); + this.hasMergedNorms = hasMergedNorms; preLockless = false; } @@ -82,7 +83,7 @@ System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length); } isCompoundFile = src.isCompoundFile; - withNrm = src.withNrm; + hasMergedNorms = src.hasMergedNorms; } /** @@ -99,6 +100,11 @@ docCount = input.readInt(); if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.readLong(); + if (format <= SegmentInfos.FORMAT_MERGED_NORMS) { + hasMergedNorms = (1 == input.readByte()); + } else { + hasMergedNorms = false; + } int numNormGen = input.readInt(); if (numNormGen == -1) { normGen = null; @@ -115,8 +121,8 @@ normGen = null; isCompoundFile = 0; preLockless = true; + hasMergedNorms = false; } - withNrm = 0; } void setNumFields(int numFields) { @@ -179,7 +185,7 @@ si.isCompoundFile = isCompoundFile; si.delGen = delGen; si.preLockless = preLockless; - si.withNrm = withNrm; + si.hasMergedNorms = hasMergedNorms; if (normGen != null) { si.normGen = (long[]) normGen.clone(); } @@ -297,7 +303,7 @@ return IndexFileNames.fileNameFromGeneration(name, prefix + number, gen); } - if (withNrm()) { + if (getHasMergedNorms()) { // case 2: lockless (or nrm file exists) - single file for all norms prefix = "." + IndexFileNames.NORMS_EXTENSION; return IndexFileNames.fileNameFromGeneration(name, prefix, 0); @@ -339,26 +345,8 @@ /** * Returns true iff this segment stores field norms in a single .nrm file. */ - private boolean withNrm () throws IOException { - if (withNrm == -1) { - return false; - } - if (withNrm == 1) { - return true; - } - Directory d = dir; - try { - if (getUseCompoundFile()) { - d = new CompoundFileReader(dir, name + ".cfs"); - } - boolean res = d.fileExists(name + "." + IndexFileNames.NORMS_EXTENSION); - withNrm = (byte) (res ? 1 : -1); // avoid more file tests like this - return res; - } finally { - if (d!=dir && d!=null) { - d.close(); - } - } + private boolean getHasMergedNorms() throws IOException { + return hasMergedNorms; } /** @@ -369,6 +357,7 @@ output.writeString(name); output.writeInt(docCount); output.writeLong(delGen); + output.writeByte((byte) (hasMergedNorms ? 1:0)); if (normGen == null) { output.writeInt(-1); } else { Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 496665) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -321,7 +321,7 @@ if (addedNrm) continue; // add .nrm just once addedNrm = true; } - files.addElement(name); + files.addElement(name); } } return files; Index: src/site/src/documentation/content/xdocs/fileformats.xml =================================================================== --- src/site/src/documentation/content/xdocs/fileformats.xml (revision 496665) +++ src/site/src/documentation/content/xdocs/fileformats.xml (working copy) @@ -762,8 +762,8 @@

The active segments in the index are stored in the segment info file, - segments_N - . There may + segments_N. + There may be one or more segments_N files in the @@ -779,13 +779,13 @@

As of 2.1, there is also a file - segments.gen - . This file contains the + segments.gen. + This file contains the current generation (the _N in - segments_N - ) of the index. This is + segments_N) + of the index. This is used only as a fallback in case the current generation cannot be accurately determined by directory listing alone (as is the case for some @@ -803,11 +803,9 @@

2.1 and above: - Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, NumField, NormGen - NumField - > - SegCount - , IsCompoundFile + Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, HasMergedNorms, NumField, + NormGenNumField, + IsCompoundFile>SegCount

@@ -823,11 +821,11 @@

- IsCompoundFile --> Int8 + IsCompoundFile, HasMergedNorms --> Int8

- Format is -1 as of Lucene 1.4 and -2 as of Lucene 2.1. + Format is -1 as of Lucene 1.4 and -3 (SemgentInfos.FORMAT_MERGED_NORMS) as of Lucene 2.1.

@@ -881,6 +879,13 @@ exists.

+

+ If HasMergedNorms is 1, then the field norms are + written as a single joined file (with extension + .nrm); if it is 0 then each field's norms + are stored as separate .fN files. See + "Normalization Factors" below for details. +