Index: src/java/org/apache/lucene/document/AbstractField.java =================================================================== --- src/java/org/apache/lucene/document/AbstractField.java (revision 678321) +++ src/java/org/apache/lucene/document/AbstractField.java (working copy) @@ -33,6 +33,7 @@ protected boolean isBinary = false; protected boolean isCompressed = false; protected boolean lazy = false; + protected boolean omitTf = false; protected float boost = 1.0f; // the one and only data object for all different kind of field values protected Object fieldsData = null; @@ -203,6 +204,9 @@ /** True if norms are omitted for this indexed field */ public boolean getOmitNorms() { return omitNorms; } + /** True if tf is omitted for this indexed field */ + public boolean getOmitTf() { return omitTf; } + /** Expert: * * If set, omit normalization factors associated with this indexed field. @@ -210,6 +214,12 @@ */ public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; } + /** Expert: + * + * If set, omit tf from postings of this indexed field. + */ + public void setOmitTf(boolean omitTf) { this.omitTf=omitTf; } + public boolean isLazy() { return lazy; } @@ -257,6 +267,9 @@ if (omitNorms) { result.append(",omitNorms"); } + if (omitTf) { + result.append(",omitTf"); + } if (lazy){ result.append(",lazy"); } Index: src/java/org/apache/lucene/document/Fieldable.java =================================================================== --- src/java/org/apache/lucene/document/Fieldable.java (revision 678321) +++ src/java/org/apache/lucene/document/Fieldable.java (working copy) @@ -133,6 +133,15 @@ */ void setOmitNorms(boolean omitNorms); + /** Expert: + * + * If set, omit term freq, positions and payloads from postings for this field. + */ + void setOmitTf(boolean omitTf); + + /** True if tf is omitted for this indexed field */ + boolean getOmitTf(); + /** * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving * it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that Index: src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java =================================================================== --- src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (revision 678321) +++ src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (working copy) @@ -183,7 +183,7 @@ // easily add it FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), - field.getOmitNorms(), false); + field.getOmitNorms(), false, field.getOmitTf()); fp = new DocFieldProcessorPerField(this, fi); fp.next = fieldHash[hashPos]; @@ -195,7 +195,7 @@ } else fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), - field.getOmitNorms(), false); + field.getOmitNorms(), false, field.getOmitTf()); if (thisFieldGen != fp.lastGen) { Index: src/java/org/apache/lucene/index/FieldInfo.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfo.java (revision 678321) +++ src/java/org/apache/lucene/index/FieldInfo.java (working copy) @@ -27,13 +27,14 @@ boolean storeOffsetWithTermVector; boolean storePositionWithTermVector; - boolean omitNorms; // omit norms associated with indexed fields + boolean omitNorms; // omit norms associated with indexed fields + boolean omitTf; // omit tf boolean storePayloads; // whether this field stores payloads together with term positions FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, - boolean omitNorms, boolean storePayloads) { + boolean omitNorms, boolean storePayloads, boolean omitTf) { name = na; isIndexed = tk; number = nu; @@ -42,15 +43,16 @@ this.storePositionWithTermVector = storePositionWithTermVector; this.omitNorms = omitNorms; this.storePayloads = storePayloads; + this.omitTf = omitTf; } public Object clone() { return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms, storePayloads); + storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); } void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads) { + boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTf) { if (this.isIndexed != isIndexed) { this.isIndexed = true; // once indexed, always index } @@ -66,6 +68,9 @@ if (this.omitNorms != omitNorms) { this.omitNorms = false; // once norms are stored, always store } + if (this.omitTf != omitTf) { + this.omitTf = true; // if one require omitTf at least once, it remains off for life + } if (this.storePayloads != storePayloads) { this.storePayloads = true; } @@ -87,6 +92,9 @@ if (omitNorms != other.omitNorms) { omitNorms = false; // once norms are stored, always store } + if (this.omitTf != omitTf) { + this.omitTf = true; // if one require omitTf at least once, it remains off for life + } if (storePayloads != other.storePayloads) { storePayloads = true; } Index: src/java/org/apache/lucene/index/FieldInfos.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfos.java (revision 678321) +++ src/java/org/apache/lucene/index/FieldInfos.java (working copy) @@ -40,6 +40,7 @@ static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8; static final byte OMIT_NORMS = 0x10; static final byte STORE_PAYLOADS = 0x20; + static final byte OMIT_TF = 0x40; private ArrayList byNumber = new ArrayList(); private HashMap byName = new HashMap(); @@ -172,7 +173,7 @@ synchronized public void add(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) { add(name, isIndexed, storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms, false); + storeOffsetWithTermVector, omitNorms, false, false); } /** If the field is not yet known, adds it. If it is known, checks to make @@ -187,15 +188,16 @@ * @param storeOffsetWithTermVector true if the term vector with offsets should be stored * @param omitNorms true if the norms for the indexed field should be omitted * @param storePayloads true if payloads should be stored for this field + * @param omitTf true if term freqs should be omitted for this field */ synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, - boolean omitNorms, boolean storePayloads) { + boolean omitNorms, boolean storePayloads, boolean omitTf) { FieldInfo fi = fieldInfo(name); if (fi == null) { - return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads); + return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); } else { - fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads); + fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); } return fi; } @@ -205,7 +207,7 @@ if (fi == null) { return addInternal(fieldInfo.name, fieldInfo.isIndexed, fieldInfo.storeTermVector, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector, - fieldInfo.omitNorms, fieldInfo.storePayloads); + fieldInfo.omitNorms, fieldInfo.storePayloads, fieldInfo.omitTf); } else { fi.update(fieldInfo); } @@ -214,10 +216,10 @@ private FieldInfo addInternal(String name, boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads) { + boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTf) { FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms, storePayloads); + storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); byNumber.add(fi); byName.put(name, fi); return fi; @@ -289,6 +291,8 @@ if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; if (fi.omitNorms) bits |= OMIT_NORMS; if (fi.storePayloads) bits |= STORE_PAYLOADS; + if (fi.omitTf) bits |= OMIT_TF; + output.writeString(fi.name); output.writeByte(bits); } @@ -305,8 +309,9 @@ boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; boolean omitNorms = (bits & OMIT_NORMS) != 0; boolean storePayloads = (bits & STORE_PAYLOADS) != 0; + boolean omitTf = (bits & OMIT_TF) != 0; - addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads); + addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); } } Index: src/java/org/apache/lucene/index/FreqProxTermsWriter.java =================================================================== --- src/java/org/apache/lucene/index/FreqProxTermsWriter.java (revision 678321) +++ src/java/org/apache/lucene/index/FreqProxTermsWriter.java (working copy) @@ -205,8 +205,12 @@ } final int skipInterval = termsOut.skipInterval; - final boolean currentFieldStorePayloads = fields[0].fieldInfo.storePayloads; + final boolean currentFieldOmitTf = fields[0].fieldInfo.omitTf; + // If current field omits tf then it cannot store + // payloads. We silently drop the payloads in this case: + final boolean currentFieldStorePayloads = currentFieldOmitTf ? false : fields[0].fieldInfo.storePayloads; + FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields]; while(numFields > 0) { @@ -261,7 +265,7 @@ assert doc < flushState.numDocsInRAM; assert doc > lastDoc || df == 1; - final int newDocCode = (doc-lastDoc)<<1; + final int newDocCode = currentFieldOmitTf ? (doc-lastDoc) : (doc-lastDoc)<<1; lastDoc = doc; @@ -270,35 +274,40 @@ // Carefully copy over the prox + payload info, // changing the format to match Lucene's segment // format. - for(int j=0;j 0) - copyBytes(prox, proxOut, payloadLength); - } else { - assert 0 == (code & 1); - proxOut.writeVInt(code>>1); + if(!currentFieldOmitTf){ //we do not write anything to the proxOut without tf + for(int j=0;j 0) + copyBytes(prox, proxOut, payloadLength); + } else { + assert 0 == (code & 1); + proxOut.writeVInt(code>>1); + } + }//End for + + if (1 == termDocFreq) { //omitTf == false + freqOut.writeVInt(newDocCode|1); + } else { + freqOut.writeVInt(newDocCode); + freqOut.writeVInt(termDocFreq); } - } - - if (1 == termDocFreq) { - freqOut.writeVInt(newDocCode|1); - } else { + } else{ //omitTf==true freqOut.writeVInt(newDocCode); - freqOut.writeVInt(termDocFreq); } + if (!minState.nextDoc()) { Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 678321) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -75,6 +75,8 @@ public static final FieldOption INDEXED = new FieldOption ("INDEXED"); /** All fields that store payloads */ public static final FieldOption STORES_PAYLOADS = new FieldOption ("STORES_PAYLOADS"); + /** All fields that omit tf */ + public static final FieldOption OMIT_TF = new FieldOption ("OMIT_TF"); /** All fields which are not indexed */ public static final FieldOption UNINDEXED = new FieldOption ("UNINDEXED"); /** All fields which are indexed with termvectors enabled */ Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 678321) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -198,11 +198,11 @@ } private void addIndexed(IndexReader reader, FieldInfos fieldInfos, Collection names, boolean storeTermVectors, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean storePayloads) throws IOException { + boolean storeOffsetWithTermVector, boolean storePayloads, boolean omitTf) throws IOException { Iterator i = names.iterator(); while (i.hasNext()) { String field = (String)i.next(); - fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field), storePayloads); + fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field), storePayloads, omitTf); } } @@ -265,15 +265,16 @@ SegmentReader segmentReader = (SegmentReader) reader; for (int j = 0; j < segmentReader.getFieldInfos().size(); j++) { FieldInfo fi = segmentReader.getFieldInfos().fieldInfo(j); - fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.hasNorms(fi.name), fi.storePayloads); + fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.hasNorms(fi.name), fi.storePayloads, fi.omitTf); } } else { - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false); fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false); } } @@ -564,8 +565,13 @@ long freqPointer = freqOutput.getFilePointer(); long proxPointer = proxOutput.getFilePointer(); - int df = appendPostings(smis, n); // append posting data - + int df; + if(fieldInfos.fieldInfo(smis[0].term.field).omitTf){ // append posting data + df = appendPostingsNoTf(smis, n); + } else{ + df = appendPostings(smis, n); + } + long skipPointer = skipListWriter.writeSkip(freqOutput); if (df > 0) { @@ -672,6 +678,54 @@ return df; } + /** Process postings from multiple segments without tf, all positioned on the + * same term. Writes out merged entries only into freqOutput, proxOut is not written. + * + * @param smis array of segments + * @param n number of cells in the array actually occupied + * @return number of documents across all segments where this term was found + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + private final int appendPostingsNoTf(SegmentMergeInfo[] smis, int n) + throws CorruptIndexException, IOException { + int lastDoc = 0; + int df = 0; // number of docs w/ term + skipListWriter.resetSkip(); + int lastPayloadLength = -1; // ensures that we write the first length + for (int i = 0; i < n; i++) { + SegmentMergeInfo smi = smis[i]; + TermPositions postings = smi.getPositions(); + assert postings != null; + int base = smi.base; + int[] docMap = smi.getDocMap(); + postings.seek(smi.termEnum); + while (postings.next()) { + int doc = postings.doc(); + if (docMap != null) + doc = docMap[doc]; // map around deletions + doc += base; // convert to merged space + + if (doc < 0 || (df > 0 && doc <= lastDoc)) + throw new CorruptIndexException("docs out of order (" + doc + + " <= " + lastDoc + " )"); + + df++; + + if ((df % skipInterval) == 0) { + skipListWriter.setSkipData(lastDoc, false, lastPayloadLength); + skipListWriter.bufferSkip(df); + } + + int docCode = (doc - lastDoc); + lastDoc = doc; + freqOutput.writeVInt(docCode); // write doc & freq=1 + + } + } + return df; + } + private void mergeNorms() throws IOException { byte[] normBuffer = null; IndexOutput output = null; Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 678321) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -728,6 +728,9 @@ else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { fieldSet.add(fi.name); } + else if (fi.omitTf && fieldOption == IndexReader.FieldOption.OMIT_TF) { + fieldSet.add(fi.name); + } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) { fieldSet.add(fi.name); } Index: src/java/org/apache/lucene/index/SegmentTermDocs.java =================================================================== --- src/java/org/apache/lucene/index/SegmentTermDocs.java (revision 678321) +++ src/java/org/apache/lucene/index/SegmentTermDocs.java (working copy) @@ -41,7 +41,8 @@ private boolean haveSkipped; protected boolean currentFieldStoresPayloads; - + protected boolean currentFieldOmitTf; + protected SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.freqStream.clone(); @@ -75,6 +76,7 @@ void seek(TermInfo ti, Term term) throws IOException { count = 0; FieldInfo fi = parent.fieldInfos.fieldInfo(term.field); + currentFieldOmitTf = (fi != null) ? fi.omitTf : false; currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false; if (ti == null) { df = 0; @@ -105,14 +107,19 @@ while (true) { if (count == df) return false; - int docCode = freqStream.readVInt(); - doc += docCode >>> 1; // shift off low bit - if ((docCode & 1) != 0) // if low bit is set - freq = 1; // freq is one - else - freq = freqStream.readVInt(); // else read freq - + + if (currentFieldOmitTf) { + doc += docCode; + freq = 1; + } else { + doc += docCode >>> 1; // shift off low bit + if ((docCode & 1) != 0) // if low bit is set + freq = 1; // freq is one + else + freq = freqStream.readVInt(); // else read freq + } + count++; if (deletedDocs == null || !deletedDocs.get(doc)) @@ -126,27 +133,49 @@ public int read(final int[] docs, final int[] freqs) throws IOException { final int length = docs.length; + if (currentFieldOmitTf) { + return readNoTf(docs, freqs, length); + } else { + int i = 0; + while (i < length && count < df) { + // manually inlined call to next() for speed + final int docCode = freqStream.readVInt(); + doc += docCode >>> 1; // shift off low bit + if ((docCode & 1) != 0) // if low bit is set + freq = 1; // freq is one + else + freq = freqStream.readVInt(); // else read freq + count++; + + if (deletedDocs == null || !deletedDocs.get(doc)) { + docs[i] = doc; + freqs[i] = freq; + ++i; + } + } + return i; + } + } + + private final int readNoTf(final int[] docs, final int[] freqs, final int length) throws IOException{ int i = 0; while (i < length && count < df) { - // manually inlined call to next() for speed - final int docCode = freqStream.readVInt(); - doc += docCode >>> 1; // shift off low bit - if ((docCode & 1) != 0) // if low bit is set - freq = 1; // freq is one - else - freq = freqStream.readVInt(); // else read freq + doc += freqStream.readVInt(); count++; if (deletedDocs == null || !deletedDocs.get(doc)) { docs[i] = doc; - freqs[i] = freq; + // Hardware freq to 1 when term freqs were not + // stored in the index + freqs[i] = 1; ++i; } } return i; } - + + /** Overridden by SegmentTermPositions to skip in prox stream. */ protected void skipProx(long proxPointer, int payloadLength) throws IOException {} Index: src/java/org/apache/lucene/index/SegmentTermPositions.java =================================================================== --- src/java/org/apache/lucene/index/SegmentTermPositions.java (revision 678321) +++ src/java/org/apache/lucene/index/SegmentTermPositions.java (working copy) @@ -67,6 +67,7 @@ } private final int readDeltaPosition() throws IOException { + if(currentFieldOmitTf) return 0;//TODO: ???? int delta = proxStream.readVInt(); if (currentFieldStoresPayloads) { // if the current field stores payloads then Index: src/test/org/apache/lucene/index/TestOmitTf.java =================================================================== --- src/test/org/apache/lucene/index/TestOmitTf.java (revision 0) +++ src/test/org/apache/lucene/index/TestOmitTf.java (revision 0) @@ -0,0 +1,228 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.util.Collection; + +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.HitCollector; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; + + +public class TestOmitTf extends LuceneTestCase { + + public static class SimpleSimilarity extends Similarity { + public float lengthNorm(String field, int numTerms) { return 1.0f; } + public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } + + public float tf(float freq) { return freq; } + + public float sloppyFreq(int distance) { return 2.0f; } + public float idf(Collection terms, Searcher searcher) { return 1.0f; } + public float idf(int docFreq, int numDocs) { return 1.0f; } + public float coord(int overlap, int maxOverlap) { return 1.0f; } + } + + + // Tests whether the DocumentWriter correctly enable the + // omitTf bit in the FieldInfo + public void testOmitTf() throws Exception { + Directory ram = new RAMDirectory(); + Analyzer analyzer = new StandardAnalyzer(); + IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); + Document d = new Document(); + + // this field will have Tf + Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.TOKENIZED); + d.add(f1); + + // this field will NOT have Tf + Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.TOKENIZED); + f2.setOmitTf(true); + d.add(f2); + + writer.addDocument(d); + writer.optimize(); + // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger + // keept things constant + d = new Document(); + + //Reverese + f1.setOmitTf(true); + d.add(f1); + + f2.setOmitTf(false); + d.add(f2); + + writer.addDocument(d); + // force merge + writer.optimize(); + // flush + writer.close(); + + // only one segment in the index, so we can cast to SegmentReader + SegmentReader reader = (SegmentReader) IndexReader.open(ram); + FieldInfos fi = reader.fieldInfos(); + assertTrue("OmitTf field bit should be set.", fi.fieldInfo("f1").omitTf); + assertTrue("OmitTf field bit should be set.", fi.fieldInfo("f2").omitTf); + + reader.close(); + + } + + + + //Test scores with one field with Term Freqs and one without, otherwise with equal content + public void testBasic() throws Exception { + Directory dir = new RAMDirectory(); + Analyzer analyzer = new StandardAnalyzer(); + IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); + writer.setSimilarity(new SimpleSimilarity()); + + + StringBuffer sb = new StringBuffer(265); + String term = "term"; + for(int i = 0; i<30; i++){ + Document d = new Document(); + sb.append(term).append(" "); + String content = sb.toString(); + Field noTf = new Field("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.TOKENIZED); + noTf.setOmitTf(true); + d.add(noTf); + + Field tf = new Field("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.TOKENIZED); + d.add(tf); + + writer.addDocument(d); + //System.out.println(d); + } + + writer.optimize(); + // flush + writer.close(); + + /* + * Verify the index + */ + Searcher searcher = new IndexSearcher(dir); + searcher.setSimilarity(new SimpleSimilarity()); + + Term a = new Term("noTf", term); + Term b = new Term("tf", term); + Term c = new Term("noTf", "notf"); + Term d = new Term("tf", "tf"); + TermQuery q1 = new TermQuery(a); + TermQuery q2 = new TermQuery(b); + TermQuery q3 = new TermQuery(c); + TermQuery q4 = new TermQuery(d); + + + searcher.search(q1, + new CountingHitCollector() { + public final void collect(int doc, float score) { + //System.out.println("Q1: Doc=" + doc + " score=" + score); + assertTrue(score==1.0f); + super.collect(doc, score); + } + }); + System.out.println(CountingHitCollector.getCount()); + + + searcher.search(q2, + new CountingHitCollector() { + public final void collect(int doc, float score) { + //System.out.println("Q2: Doc=" + doc + " score=" + score); + assertTrue(score==1.0f+doc); + super.collect(doc, score); + } + }); + System.out.println(CountingHitCollector.getCount()); + + + + + + searcher.search(q3, + new CountingHitCollector() { + public final void collect(int doc, float score) { + //System.out.println("Q1: Doc=" + doc + " score=" + score); + assertTrue(score==1.0f); + assertFalse(doc%2==0); + super.collect(doc, score); + } + }); + System.out.println(CountingHitCollector.getCount()); + + + searcher.search(q4, + new CountingHitCollector() { + public final void collect(int doc, float score) { + //System.out.println("Q1: Doc=" + doc + " score=" + score); + assertTrue(score==1.0f); + assertTrue(doc%2==0); + super.collect(doc, score); + } + }); + System.out.println(CountingHitCollector.getCount()); + + + + BooleanQuery bq = new BooleanQuery(); + bq.add(q1,Occur.MUST); + bq.add(q4,Occur.MUST); + + searcher.search(bq, + new CountingHitCollector() { + public final void collect(int doc, float score) { + //System.out.println("BQ: Doc=" + doc + " score=" + score); + super.collect(doc, score); + } + }); + assertTrue(15 == CountingHitCollector.getCount()); + + searcher.close(); + + } + + public static class CountingHitCollector extends HitCollector { + static int count=0; + static int sum=0; + CountingHitCollector(){count=0;sum=0;} + public void collect(int doc, float score) { + count++; + sum += doc; // use it to avoid any possibility of being optimized away + } + + public static int getCount() { return count; } + public static int getSum() { return sum; } + } + +}