Index: src/java/org/apache/lucene/index/FieldInfos.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfos.java (revision 678121) +++ src/java/org/apache/lucene/index/FieldInfos.java (working copy) @@ -40,6 +40,7 @@ static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8; static final byte OMIT_NORMS = 0x10; static final byte STORE_PAYLOADS = 0x20; + static final byte OMIT_TF = 0x40; private ArrayList byNumber = new ArrayList(); private HashMap byName = new HashMap(); @@ -172,7 +173,7 @@ synchronized public void add(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) { add(name, isIndexed, storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms, false); + storeOffsetWithTermVector, omitNorms, false, false); } /** If the field is not yet known, adds it. If it is known, checks to make @@ -187,15 +188,16 @@ * @param storeOffsetWithTermVector true if the term vector with offsets should be stored * @param omitNorms true if the norms for the indexed field should be omitted * @param storePayloads true if payloads should be stored for this field + * @param omitTf true if term freqs should be omitted for this field */ synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, - boolean omitNorms, boolean storePayloads) { + boolean omitNorms, boolean storePayloads, boolean omitTf) { FieldInfo fi = fieldInfo(name); if (fi == null) { - return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads); + return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); } else { - fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads); + fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); } return fi; } @@ -205,7 +207,7 @@ if (fi == null) { return addInternal(fieldInfo.name, fieldInfo.isIndexed, fieldInfo.storeTermVector, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector, - fieldInfo.omitNorms, fieldInfo.storePayloads); + fieldInfo.omitNorms, fieldInfo.storePayloads, fieldInfo.omitTf); } else { fi.update(fieldInfo); } @@ -214,10 +216,10 @@ private FieldInfo addInternal(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads) { + boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTf) { FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms, storePayloads); + storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); byNumber.add(fi); byName.put(name, fi); return fi; @@ -289,6 +291,8 @@ if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; if (fi.omitNorms) bits |= OMIT_NORMS; if (fi.storePayloads) bits |= STORE_PAYLOADS; + if (fi.omitTf) bits |= OMIT_TF; + output.writeString(fi.name); output.writeByte(bits); } @@ -305,8 +309,9 @@ boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; boolean omitNorms = (bits & OMIT_NORMS) != 0; boolean storePayloads = (bits & STORE_PAYLOADS) != 0; + boolean omitTf = (bits & OMIT_TF) != 0; - addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads); + addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); } } Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 678121) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -198,11 +198,11 @@ } private void addIndexed(IndexReader reader, FieldInfos fieldInfos, Collection names, boolean storeTermVectors, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean storePayloads) throws IOException { + boolean storeOffsetWithTermVector, boolean storePayloads, boolean omitTf) throws IOException { Iterator i = names.iterator(); while (i.hasNext()) { String field = (String)i.next(); - fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field), storePayloads); + fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field), storePayloads, omitTf); } } @@ -265,15 +265,16 @@ SegmentReader segmentReader = (SegmentReader) reader; for (int j = 0; j < segmentReader.getFieldInfos().size(); j++) { FieldInfo fi = segmentReader.getFieldInfos().fieldInfo(j); - fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.hasNorms(fi.name), fi.storePayloads); + fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.hasNorms(fi.name), fi.storePayloads, fi.omitTf); } } else { - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false); fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false); } } Index: src/java/org/apache/lucene/index/FreqProxTermsWriter.java =================================================================== --- src/java/org/apache/lucene/index/FreqProxTermsWriter.java (revision 678121) +++ src/java/org/apache/lucene/index/FreqProxTermsWriter.java (working copy) @@ -205,8 +205,12 @@ } final int skipInterval = termsOut.skipInterval; - final boolean currentFieldStorePayloads = fields[0].fieldInfo.storePayloads; + final boolean currentFieldOmitTf = fields[0].fieldInfo.omitTf; + // If current field omits tf then it cannot store + // payloads. We silently drop the payloads in this case: + final boolean currentFieldStorePayloads = currentFieldOmitTf ? false : fields[0].fieldInfo.storePayloads; + FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields]; while(numFields > 0) { @@ -261,7 +265,7 @@ assert doc < flushState.numDocsInRAM; assert doc > lastDoc || df == 1; - final int newDocCode = (doc-lastDoc)<<1; + final int newDocCode = currentFieldOmitTf ? (doc-lastDoc) : (doc-lastDoc)<<1; lastDoc = doc; @@ -293,7 +297,10 @@ } } - if (1 == termDocFreq) { + if (currentFieldOmitTf) { + freqOut.writeVInt(newDocCode); + } + else if (1 == termDocFreq) { freqOut.writeVInt(newDocCode|1); } else { freqOut.writeVInt(newDocCode); Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 678121) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -728,6 +728,9 @@ else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { fieldSet.add(fi.name); } + else if (fi.omitTf && fieldOption == IndexReader.FieldOption.OMIT_TF) { + fieldSet.add(fi.name); + } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) { fieldSet.add(fi.name); } Index: src/java/org/apache/lucene/index/FieldInfo.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfo.java (revision 678121) +++ src/java/org/apache/lucene/index/FieldInfo.java (working copy) @@ -27,13 +27,14 @@ boolean storeOffsetWithTermVector; boolean storePositionWithTermVector; - boolean omitNorms; // omit norms associated with indexed fields + boolean omitNorms; // omit norms associated with indexed fields + boolean omitTf; // omit tf boolean storePayloads; // whether this field stores payloads together with term positions FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, - boolean omitNorms, boolean storePayloads) { + boolean omitNorms, boolean storePayloads, boolean omitTf) { name = na; isIndexed = tk; number = nu; @@ -42,15 +43,16 @@ this.storePositionWithTermVector = storePositionWithTermVector; this.omitNorms = omitNorms; this.storePayloads = storePayloads; + this.omitTf = omitTf; } public Object clone() { return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms, storePayloads); + storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); } void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads) { + boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTf) { if (this.isIndexed != isIndexed) { this.isIndexed = true; // once indexed, always index } @@ -66,6 +68,9 @@ if (this.omitNorms != omitNorms) { this.omitNorms = false; // once norms are stored, always store } + if (this.omitTf != omitTf) { + this.omitTf = false; // once tf are stored, always store TODO: is this ok? + } if (this.storePayloads != storePayloads) { this.storePayloads = true; } @@ -87,6 +92,9 @@ if (omitNorms != other.omitNorms) { omitNorms = false; // once norms are stored, always store } + if (this.omitTf != omitTf) { + this.omitTf = false; // once tf-s are stored, always store them TODO: is this ok? + } if (storePayloads != other.storePayloads) { storePayloads = true; } Index: src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java =================================================================== --- src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (revision 678121) +++ src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (working copy) @@ -183,7 +183,7 @@ // easily add it FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), - field.getOmitNorms(), false); + field.getOmitNorms(), false, field.getOmitTf()); fp = new DocFieldProcessorPerField(this, fi); fp.next = fieldHash[hashPos]; @@ -195,7 +195,7 @@ } else fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), - field.getOmitNorms(), false); + field.getOmitNorms(), false, field.getOmitTf()); if (thisFieldGen != fp.lastGen) { Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 678121) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -75,6 +75,8 @@ public static final FieldOption INDEXED = new FieldOption ("INDEXED"); /** All fields that store payloads */ public static final FieldOption STORES_PAYLOADS = new FieldOption ("STORES_PAYLOADS"); + /** All fields that omit tf */ + public static final FieldOption OMIT_TF = new FieldOption ("OMIT_TF"); /** All fields which are not indexed */ public static final FieldOption UNINDEXED = new FieldOption ("UNINDEXED"); /** All fields which are indexed with termvectors enabled */ Index: src/java/org/apache/lucene/index/SegmentTermDocs.java =================================================================== --- src/java/org/apache/lucene/index/SegmentTermDocs.java (revision 678121) +++ src/java/org/apache/lucene/index/SegmentTermDocs.java (working copy) @@ -41,7 +41,8 @@ private boolean haveSkipped; protected boolean currentFieldStoresPayloads; - + protected boolean currentFieldOmitTf; + protected SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.freqStream.clone(); @@ -75,6 +76,7 @@ void seek(TermInfo ti, Term term) throws IOException { count = 0; FieldInfo fi = parent.fieldInfos.fieldInfo(term.field); + currentFieldOmitTf = (fi != null) ? fi.omitTf : false; currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false; if (ti == null) { df = 0; @@ -105,14 +107,19 @@ while (true) { if (count == df) return false; - - int docCode = freqStream.readVInt(); - doc += docCode >>> 1; // shift off low bit - if ((docCode & 1) != 0) // if low bit is set - freq = 1; // freq is one - else - freq = freqStream.readVInt(); // else read freq - + + if (currentFieldOmitTf) { + doc = freqStream.readVInt(); + freq = 1; + } else { + int docCode = freqStream.readVInt(); + doc += docCode >>> 1; // shift off low bit + if ((docCode & 1) != 0) // if low bit is set + freq = 1; // freq is one + else + freq = freqStream.readVInt(); // else read freq + } + count++; if (deletedDocs == null || !deletedDocs.get(doc)) @@ -126,27 +133,49 @@ public int read(final int[] docs, final int[] freqs) throws IOException { final int length = docs.length; + if (currentFieldOmitTf) { + return readNoTf(docs, freqs, length); + } else { + int i = 0; + while (i < length && count < df) { + // manually inlined call to next() for speed + final int docCode = freqStream.readVInt(); + doc += docCode >>> 1; // shift off low bit + if ((docCode & 1) != 0) // if low bit is set + freq = 1; // freq is one + else + freq = freqStream.readVInt(); // else read freq + count++; + + if (deletedDocs == null || !deletedDocs.get(doc)) { + docs[i] = doc; + freqs[i] = freq; + ++i; + } + } + return i; + } + } + + private final int readNoTf(final int[] docs, final int[] freqs, final int length) throws IOException{ int i = 0; while (i < length && count < df) { - // manually inlined call to next() for speed - final int docCode = freqStream.readVInt(); - doc += docCode >>> 1; // shift off low bit - if ((docCode & 1) != 0) // if low bit is set - freq = 1; // freq is one - else - freq = freqStream.readVInt(); // else read freq + doc += freqStream.readVInt(); count++; if (deletedDocs == null || !deletedDocs.get(doc)) { docs[i] = doc; - freqs[i] = freq; + // Hardware freq to 1 when term freqs were not + // stored in the index + freqs[i] = 1; ++i; } } return i; } - + + /** Overridden by SegmentTermPositions to skip in prox stream. */ protected void skipProx(long proxPointer, int payloadLength) throws IOException {} Index: src/java/org/apache/lucene/document/AbstractField.java =================================================================== --- src/java/org/apache/lucene/document/AbstractField.java (revision 678121) +++ src/java/org/apache/lucene/document/AbstractField.java (working copy) @@ -33,6 +33,7 @@ protected boolean isBinary = false; protected boolean isCompressed = false; protected boolean lazy = false; + protected boolean omitTf = false; protected float boost = 1.0f; // the one and only data object for all different kind of field values protected Object fieldsData = null; @@ -203,6 +204,9 @@ /** True if norms are omitted for this indexed field */ public boolean getOmitNorms() { return omitNorms; } + /** True if tf is omitted for this indexed field */ + public boolean getOmitTf() { return omitTf; } + /** Expert: * * If set, omit normalization factors associated with this indexed field. @@ -210,6 +214,12 @@ */ public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; } + /** Expert: + * + * If set, omit tf from postings of this indexed field. + */ + public void setOmitTf(boolean omitTf) { this.omitTf=omitTf; } + public boolean isLazy() { return lazy; } @@ -257,6 +267,9 @@ if (omitNorms) { result.append(",omitNorms"); } + if (omitTf) { + result.append(",omitTf"); + } if (lazy){ result.append(",lazy"); } Index: src/java/org/apache/lucene/document/Fieldable.java =================================================================== --- src/java/org/apache/lucene/document/Fieldable.java (revision 678121) +++ src/java/org/apache/lucene/document/Fieldable.java (working copy) @@ -133,6 +133,15 @@ */ void setOmitNorms(boolean omitNorms); + /** Expert: + * + * If set, omit term freq, positions and payloads from postings for this field. + */ + void setOmitTf(boolean omitTf); + + /** True if tf is omitted for this indexed field */ + boolean getOmitTf(); + /** * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving * it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that