Index: src/java/org/apache/lucene/index/FieldInfo.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfo.java (revision 307170) +++ src/java/org/apache/lucene/index/FieldInfo.java (working copy) @@ -26,13 +26,16 @@ boolean storeOffsetWithTermVector; boolean storePositionWithTermVector; + boolean omitNorms; // omit norms associated with indexed fields + FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, - boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) { + boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) { name = na; isIndexed = tk; number = nu; this.storeTermVector = storeTermVector; this.storeOffsetWithTermVector = storeOffsetWithTermVector; this.storePositionWithTermVector = storePositionWithTermVector; + this.omitNorms = omitNorms; } } Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 307170) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -145,10 +145,19 @@ return hi; } + public boolean hasNorms(String field) throws IOException { + for (int i = 0; i < subReaders.length; i++) { + if (subReaders[i].hasNorms(field)) return true; + } + return false; + } + public synchronized byte[] norms(String field) throws IOException { byte[] bytes = (byte[])normsCache.get(field); if (bytes != null) return bytes; // cache hit + if (!hasNorms(field)) + return fakeNorms(); bytes = new byte[maxDoc()]; for (int i = 0; i < subReaders.length; i++) @@ -160,6 +169,7 @@ public synchronized void norms(String field, byte[] result, int offset) throws IOException { byte[] bytes = (byte[])normsCache.get(field); + if (bytes==null && !hasNorms(field)) bytes=fakeNorms(); if (bytes != null) // cache hit System.arraycopy(bytes, 0, result, offset, maxDoc()); Index: src/java/org/apache/lucene/index/FieldInfos.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfos.java (revision 307170) +++ src/java/org/apache/lucene/index/FieldInfos.java (working copy) @@ -38,6 +38,7 @@ static final byte STORE_TERMVECTOR = 0x2; static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4; static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8; + static final byte OMIT_NORMS = 0x10; private ArrayList byNumber = new ArrayList(); private HashMap byName = new HashMap(); @@ -66,7 +67,7 @@ while (fields.hasMoreElements()) { Field field = (Field) fields.nextElement(); add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), - field.isStoreOffsetWithTermVector()); + field.isStoreOffsetWithTermVector(), field.isOmitNorms()); } } @@ -109,7 +110,7 @@ * @see #add(String, boolean, boolean, boolean, boolean) */ public void add(String name, boolean isIndexed) { - add(name, isIndexed, false, false, false); + add(name, isIndexed, false, false, false, false); } /** @@ -120,7 +121,7 @@ * @param storeTermVector true if the term vector should be stored */ public void add(String name, boolean isIndexed, boolean storeTermVector){ - add(name, isIndexed, storeTermVector, false, false); + add(name, isIndexed, storeTermVector, false, false, false); } /** If the field is not yet known, adds it. If it is known, checks to make @@ -136,9 +137,27 @@ */ public void add(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) { + + add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false); + } + + /** If the field is not yet known, adds it. If it is known, checks to make + * sure that the isIndexed flag is the same as was given previously for this + * field. If not - marks it as being indexed. Same goes for the TermVector + * parameters. + * + * @param name The name of the field + * @param isIndexed true if the field is indexed + * @param storeTermVector true if the term vector should be stored + * @param storePositionWithTermVector true if the term vector with positions should be stored + * @param storeOffsetWithTermVector true if the term vector with offsets should be stored + * @param omitNorms true if the norms for the indexed field should be omitted + */ + public void add(String name, boolean isIndexed, boolean storeTermVector, + boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) { FieldInfo fi = fieldInfo(name); if (fi == null) { - addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector); + addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms); } else { if (fi.isIndexed != isIndexed) { fi.isIndexed = true; // once indexed, always index @@ -152,15 +171,20 @@ if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector) { fi.storeOffsetWithTermVector = true; // once vector, always vector } + if (fi.omitNorms != omitNorms) { + fi.omitNorms = false; // once norms are stored, always store + } + } } + private void addInternal(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector) { + boolean storeOffsetWithTermVector, boolean omitNorms) { FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector); + storeOffsetWithTermVector, omitNorms); byNumber.add(fi); byName.put(name, fi); } @@ -245,6 +269,7 @@ if (fi.storeTermVector) bits |= STORE_TERMVECTOR; if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR; if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; + if (fi.omitNorms) bits |= OMIT_NORMS; output.writeString(fi.name); output.writeByte(bits); } @@ -259,7 +284,9 @@ boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0; boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; - addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector); + boolean omitNorms = (bits & OMIT_NORMS) != 0; + + addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms); } } Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 307170) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.IndexInput; @@ -338,6 +339,23 @@ /** Returns true if any documents have been deleted */ public abstract boolean hasDeletions(); + /** Returns true if there are norms stored for this field. */ + public boolean hasNorms(String field) throws IOException { + // backward compatible implementation. + // SegmentReader has an efficient implementation. + return norms(field) != null; + } + + protected byte[] ones; + protected synchronized byte[] fakeNorms() { + if (ones==null) { + ones = new byte[maxDoc()]; + Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f)); + return ones; + } + return ones; + } + /** Returns the byte-encoded normalization factor for the named field of * every document. This is used by the search code to score documents. * Index: src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- src/java/org/apache/lucene/index/FilterIndexReader.java (revision 307170) +++ src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -107,6 +107,10 @@ public boolean hasDeletions() { return in.hasDeletions(); } protected void doUndeleteAll() throws IOException { in.undeleteAll(); } + public boolean hasNorms(String field) throws IOException { + return in.hasNorms(field); + } + public byte[] norms(String f) throws IOException { return in.norms(f); } public void norms(String f, byte[] bytes, int offset) throws IOException { in.norms(f, bytes, offset); Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 307170) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -18,6 +18,7 @@ import java.util.Vector; import java.util.Iterator; +import java.util.Collection; import java.io.IOException; import org.apache.lucene.store.Directory; @@ -122,7 +123,7 @@ // Field norm files for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); - if (fi.isIndexed) { + if (fi.isIndexed && !fi.omitNorms) { files.add(segment + ".f" + i); } } @@ -146,6 +147,15 @@ return files; } + private void addIndexed(IndexReader reader, FieldInfos fieldInfos, Collection names, boolean storeTermVectors, boolean storePositionWithTermVector, + boolean storeOffsetWithTermVector) throws IOException { + Iterator i = names.iterator(); + while (i.hasNext()) { + String field = (String)i.next(); + fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field)); + } + } + /** * * @return The number of documents in all of the readers @@ -156,11 +166,11 @@ int docCount = 0; for (int i = 0; i < readers.size(); i++) { IndexReader reader = (IndexReader) readers.elementAt(i); - fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); - fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); - fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); - fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); - fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.write(directory, segment + ".fnm"); @@ -386,7 +396,7 @@ private void mergeNorms() throws IOException { for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); - if (fi.isIndexed) { + if (fi.isIndexed && !fi.omitNorms) { IndexOutput output = directory.createOutput(segment + ".f" + i); try { for (int j = 0; j < readers.size(); j++) { Index: src/java/org/apache/lucene/index/DocumentWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentWriter.java (revision 307170) +++ src/java/org/apache/lucene/index/DocumentWriter.java (working copy) @@ -371,7 +371,7 @@ private final void writeNorms(String segment) throws IOException { for(int n = 0; n < fieldInfos.size(); n++){ FieldInfo fi = fieldInfos.fieldInfo(n); - if(fi.isIndexed){ + if(fi.isIndexed && !fi.omitNorms){ float norm = fieldBoosts[n] * similarity.lengthNorm(fi.name, fieldLengths[n]); IndexOutput norms = directory.createOutput(segment + ".f" + n); try { Index: src/java/org/apache/lucene/index/ParallelReader.java =================================================================== --- src/java/org/apache/lucene/index/ParallelReader.java (revision 307170) +++ src/java/org/apache/lucene/index/ParallelReader.java (working copy) @@ -165,6 +165,10 @@ return ((IndexReader)fieldToReader.get(field)).getTermFreqVector(n, field); } + public boolean hasNorms(String field) throws IOException { + return ((IndexReader)fieldToReader.get(field)).hasNorms(field); + } + public byte[] norms(String field) throws IOException { return ((IndexReader)fieldToReader.get(field)).norms(field); } Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 307170) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -17,12 +17,7 @@ */ import java.io.IOException; -import java.util.Collection; -import java.util.Enumeration; -import java.util.HashSet; -import java.util.Hashtable; -import java.util.Set; -import java.util.Vector; +import java.util.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -30,6 +25,7 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BitVector; +import org.apache.lucene.search.DefaultSimilarity; /** * @version $Id$ @@ -260,7 +256,7 @@ for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); - if (fi.isIndexed){ + if (fi.isIndexed && !fi.omitNorms){ String name; if(cfsReader == null) name = segment + ".f" + i; @@ -442,10 +438,16 @@ return fieldSet; } - public synchronized byte[] norms(String field) throws IOException { + + public synchronized boolean hasNorms(String field) { + return norms.containsKey(field); + } + + // can return null if norms aren't stored + protected synchronized byte[] getNorms(String field) throws IOException { Norm norm = (Norm) norms.get(field); - if (norm == null) // not an indexed field - return null; + if (norm == null) return null; // not indexed, or norms not stored + if (norm.bytes == null) { // value not yet read byte[] bytes = new byte[maxDoc()]; norms(field, bytes, 0); @@ -454,6 +456,13 @@ return norm.bytes; } + // returns fake norms if norms aren't available + public synchronized byte[] norms(String field) throws IOException { + byte[] bytes = getNorms(field); + if (bytes==null) bytes=fakeNorms(); + return bytes; + } + protected void doSetNorm(int doc, String field, byte value) throws IOException { Norm norm = (Norm) norms.get(field); @@ -470,8 +479,10 @@ throws IOException { Norm norm = (Norm) norms.get(field); - if (norm == null) - return; // use zeros in array + if (norm == null) { + System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc()); + return; + } if (norm.bytes != null) { // can copy from cache System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc()); @@ -487,10 +498,11 @@ } } + private void openNorms(Directory cfsDir) throws IOException { for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); - if (fi.isIndexed) { + if (fi.isIndexed && !fi.omitNorms) { // look first if there are separate norms in compound format String fileName = segment + ".s" + fi.number; Directory d = directory(); Index: src/java/org/apache/lucene/document/Field.java =================================================================== --- src/java/org/apache/lucene/document/Field.java (revision 307170) +++ src/java/org/apache/lucene/document/Field.java (working copy) @@ -42,6 +42,7 @@ private boolean storeTermVector = false; private boolean storeOffsetWithTermVector = false; private boolean storePositionWithTermVector = false; + private boolean omitNorms = false; private boolean isStored = false; private boolean isIndexed = true; private boolean isTokenized = true; @@ -540,6 +541,16 @@ /** True iff the value of the filed is stored as binary */ public final boolean isBinary() { return isBinary; } + /** True if norms are omitted for this field */ + public boolean isOmitNorms() { return omitNorms; } + + /** Expert: + * + * If set, omit normalization factors associated with this indexed field. + * This effectively disables indexing boosts and length normalization for this field. + */ + public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; } + /** Prints a Field for human consumption. */ public final String toString() { StringBuffer result = new StringBuffer();