Index: src/java/org/apache/lucene/index/IndexFileNames.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNames.java (revision 489254) +++ src/java/org/apache/lucene/index/IndexFileNames.java (working copy) @@ -35,6 +35,10 @@ * pre-lockless indices) */ static final String DELETABLE = "deletable"; + /** Extension of norms file */ + static final String NORMS_EXT = "nrm"; + static final String NORMS_DOT_EXT = "." + NORMS_EXT; + /** * This array contains all filename extensions used by * Lucene's index files, with two exceptions, namely the @@ -45,7 +49,9 @@ */ static final String INDEX_EXTENSIONS[] = new String[] { "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", - "tvx", "tvd", "tvf", "tvp", "gen"}; + "tvx", "tvd", "tvf", "tvp", "gen", + NORMS_EXT + }; /** File extensions of old-style index files */ static final String COMPOUND_EXTENSIONS[] = new String[] { Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 489254) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -617,7 +617,7 @@ String segmentName = newRAMSegmentName(); dw.addDocument(segmentName, doc); synchronized (this) { - ramSegmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory, false)); + ramSegmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory, false, false)); maybeFlushRamSegments(); } } @@ -710,10 +710,10 @@ while (segmentInfos.size() > 1 || (segmentInfos.size() == 1 && (SegmentReader.hasDeletions(segmentInfos.info(0)) || + SegmentReader.hasSeparateNorms(segmentInfos.info(0)) || segmentInfos.info(0).dir != directory || (useCompoundFile && - (!SegmentReader.usesCompoundFile(segmentInfos.info(0)) || - SegmentReader.hasSeparateNorms(segmentInfos.info(0))))))) { + (!SegmentReader.usesCompoundFile(segmentInfos.info(0))))))) { int minSegment = segmentInfos.size() - mergeFactor; mergeSegments(segmentInfos, minSegment < 0 ? 0 : minSegment, segmentInfos.size()); } @@ -1056,7 +1056,7 @@ int docCount = merger.merge(); // merge 'em segmentInfos.setSize(0); // pop old infos & add new - info = new SegmentInfo(mergedName, docCount, directory, false); + info = new SegmentInfo(mergedName, docCount, directory, false, true); segmentInfos.addElement(info); commitPending = true; @@ -1276,7 +1276,7 @@ } newSegment = new SegmentInfo(mergedName, mergedDocCount, - directory, false); + directory, false, true); if (sourceSegments == ramSegmentInfos) { Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 489254) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -40,6 +40,10 @@ * @see #add */ final class SegmentMerger { + + /** norms header placeholder */ + static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; + private Directory directory; private String segment; private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL; @@ -116,7 +120,7 @@ new CompoundFileWriter(directory, fileName); Vector files = - new Vector(IndexFileNames.COMPOUND_EXTENSIONS.length + fieldInfos.size()); + new Vector(IndexFileNames.COMPOUND_EXTENSIONS.length + 1); // Basic files for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) { @@ -127,7 +131,8 @@ for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { - files.add(segment + ".f" + i); + files.add(segment + IndexFileNames.NORMS_DOT_EXT); + break; } } @@ -408,11 +413,15 @@ private void mergeNorms() throws IOException { byte[] normBuffer = null; - for (int i = 0; i < fieldInfos.size(); i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); - if (fi.isIndexed && !fi.omitNorms) { - IndexOutput output = directory.createOutput(segment + ".f" + i); - try { + IndexOutput output = null; + try { + for (int i = 0; i < fieldInfos.size(); i++) { + FieldInfo fi = fieldInfos.fieldInfo(i); + if (fi.isIndexed && !fi.omitNorms) { + if (output == null) { + output = directory.createOutput(segment + IndexFileNames.NORMS_DOT_EXT); + output.writeBytes(NORMS_HEADER,NORMS_HEADER.length); + } for (int j = 0; j < readers.size(); j++) { IndexReader reader = (IndexReader) readers.elementAt(j); int maxDoc = reader.maxDoc(); @@ -434,10 +443,12 @@ } } } - } finally { - output.close(); } } + } finally { + if (output != null) { + output.close(); + } } } Index: src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfo.java (revision 489254) +++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -42,8 +42,13 @@ private byte isCompoundFile; // -1 if it is not; 1 if it is; 0 if it's // pre-2.1 (ie, must check file system to see - // if .cfs exists) + // if .cfs and .nrm exist) + private byte withNrm; // 1 if this segment maintains norms in a single file; + // -1 if not; 0 if check file is required to tell. + // would be -1 for segments populated by DocumentWriter. + // would be 1 for (newly created) merge resulted segments (both compound and non compound). + public SegmentInfo(String name, int docCount, Directory dir) { this.name = name; this.docCount = docCount; @@ -51,14 +56,13 @@ delGen = -1; isCompoundFile = 0; preLockless = true; + withNrm = 0; } - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile) { + + public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean withNrm) { this(name, docCount, dir); - if (isCompoundFile) { - this.isCompoundFile = 1; - } else { - this.isCompoundFile = -1; - } + this.isCompoundFile = (byte) (isCompoundFile ? 1 : -1); + this.withNrm = (byte) (withNrm ? 1 : -1); preLockless = false; } @@ -78,6 +82,7 @@ System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length); } isCompoundFile = src.isCompoundFile; + withNrm = src.withNrm; } /** @@ -111,19 +116,20 @@ isCompoundFile = 0; preLockless = true; } + withNrm = 0; } - void setNumField(int numField) { + void setNumFields(int numFields) { if (normGen == null) { // normGen is null if we loaded a pre-2.1 segment // file, or, if this segments file hasn't had any // norms set against it yet: - normGen = new long[numField]; + normGen = new long[numFields]; if (!preLockless) { // This is a FORMAT_LOCKLESS segment, which means // there are no norms: - for(int i=0;i
Normalization Factors -

There's a norm file for each indexed field with a byte for +

+ Pre-2.1: + There's a norm file for each indexed field with a byte for each document. The .f[0-9]* file contains, for each document, a byte that encodes a value that is multiplied into the score for hits on that field: @@ -1406,6 +1408,27 @@ (.f[0-9]*) --> <Byte> SegSize

+

+ 2.1 and above: + There's a single .nrm file containing all norms: +

+

AllNorms + (.nrm) --> NormsHeader,<Norms> + NumFieldsWithNorms +

+

Norms + --> <Byte> + SegSize +

+

NormsHeader + --> 'N','R','M',Version +

+

Version + --> Byte +

+

NormsHeader + has 4 bytes, last of which is the format version for this file, currently -1. +

Each byte encodes a floating point value. Bits 0-2 contain the 3-bit mantissa, and bits 3-8 contain the 5-bit exponent. @@ -1441,6 +1464,18 @@

+

A separate norm file is created when the norm values of an existing segment are modified. + When field N is modified, a separate norm file .sN + is created, to maintain the norm values for that field. +

+

+ Pre-2.1: + Separate norm files are created only for compound segments. +

+

+ 2.1 and above: + Separate norm files are created (when adequate) for both compound and non compound segments. +

Term Vectors