Index: lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (revision 1235565)
+++ lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (working copy)
@@ -336,7 +336,7 @@
}
@Override
- public float idf(int docFreq, int numDocs) {
+ public float idf(long docFreq, long numDocs) {
return 1;
}
Index: lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java (revision 1235565)
+++ lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java (working copy)
@@ -73,7 +73,7 @@
}
@Override
- public float idf(int docFreq, int numDocs) {
+ public float idf(long docFreq, long numDocs) {
return 1.0f;
}
}
Index: lucene/src/test/org/apache/lucene/search/TestSimilarity.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestSimilarity.java (revision 1235565)
+++ lucene/src/test/org/apache/lucene/search/TestSimilarity.java (working copy)
@@ -49,7 +49,7 @@
@Override public void computeNorm(FieldInvertState state, Norm norm) { norm.setByte(encodeNormValue(state.getBoost())); }
@Override public float tf(float freq) { return freq; }
@Override public float sloppyFreq(int distance) { return 2.0f; }
- @Override public float idf(int docFreq, int numDocs) { return 1.0f; }
+ @Override public float idf(long docFreq, long numDocs) { return 1.0f; }
@Override public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics[] stats) {
return new Explanation(1.0f, "Inexplicable");
}
Index: lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java (revision 1235565)
+++ lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java (working copy)
@@ -129,7 +129,7 @@
}
@Override
- public float idf(int docFreq, int numDocs) {
+ public float idf(long docFreq, long numDocs) {
return 1f;
}
@@ -157,7 +157,7 @@
}
@Override
- public float idf(int docFreq, int numDocs) {
+ public float idf(long docFreq, long numDocs) {
return 10f;
}
Index: lucene/src/test/org/apache/lucene/index/TestOmitTf.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestOmitTf.java (revision 1235565)
+++ lucene/src/test/org/apache/lucene/index/TestOmitTf.java (working copy)
@@ -47,7 +47,7 @@
@Override public void computeNorm(FieldInvertState state, Norm norm) { norm.setByte(encodeNormValue(state.getBoost())); }
@Override public float tf(float freq) { return freq; }
@Override public float sloppyFreq(int distance) { return 2.0f; }
- @Override public float idf(int docFreq, int numDocs) { return 1.0f; }
+ @Override public float idf(long docFreq, long numDocs) { return 1.0f; }
@Override public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) {
return new Explanation(1.0f, "Inexplicable");
}
Index: lucene/src/java/org/apache/lucene/search/TermStatistics.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermStatistics.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/TermStatistics.java (working copy)
@@ -25,10 +25,10 @@
*/
public class TermStatistics {
private final BytesRef term;
- private final int docFreq;
+ private final long docFreq;
private final long totalTermFreq;
- public TermStatistics(BytesRef term, int docFreq, long totalTermFreq) {
+ public TermStatistics(BytesRef term, long docFreq, long totalTermFreq) {
this.term = term;
this.docFreq = docFreq;
this.totalTermFreq = totalTermFreq;
@@ -41,7 +41,7 @@
/** returns the number of documents this term occurs in
* @see IndexReader#docFreq(String, BytesRef) */
- public final int docFreq() {
+ public final long docFreq() {
return docFreq;
}
Index: lucene/src/java/org/apache/lucene/search/CollectionStatistics.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/CollectionStatistics.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/CollectionStatistics.java (working copy)
@@ -26,12 +26,12 @@
*/
public class CollectionStatistics {
private final String field;
- private final int maxDoc;
- private final int docCount;
+ private final long maxDoc;
+ private final long docCount;
private final long sumTotalTermFreq;
private final long sumDocFreq;
- public CollectionStatistics(String field, int maxDoc, int docCount, long sumTotalTermFreq, long sumDocFreq) {
+ public CollectionStatistics(String field, long maxDoc, long docCount, long sumTotalTermFreq, long sumDocFreq) {
this.field = field;
this.maxDoc = maxDoc;
this.docCount = docCount;
@@ -47,14 +47,14 @@
/** returns the total number of documents, regardless of
* whether they all contain values for this field.
* @see IndexReader#maxDoc() */
- public final int maxDoc() {
+ public final long maxDoc() {
return maxDoc;
}
/** returns the total number of documents that
* have at least one term for this field.
* @see Terms#getDocCount() */
- public final int docCount() {
+ public final long docCount() {
return docCount;
}
Index: lucene/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/BM25Similarity.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/similarities/BM25Similarity.java (working copy)
@@ -58,7 +58,7 @@
}
/** Implemented as log(1 + (numDocs - docFreq + 0.5)/(docFreq + 0.5)). */
- protected float idf(int docFreq, int numDocs) {
+ protected float idf(long docFreq, long numDocs) {
return (float) Math.log(1 + (numDocs - docFreq + 0.5D)/(docFreq + 0.5D));
}
@@ -131,19 +131,19 @@
}
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
- final int df = termStats.docFreq();
- final int max = collectionStats.maxDoc();
+ final long df = termStats.docFreq();
+ final long max = collectionStats.maxDoc();
final float idf = idf(df, max);
return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
}
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
- final int max = collectionStats.maxDoc();
+ final long max = collectionStats.maxDoc();
float idf = 0.0f;
final Explanation exp = new Explanation();
exp.setDescription("idf(), sum of:");
for (final TermStatistics stat : termStats ) {
- final int df = stat.docFreq();
+ final long df = stat.docFreq();
final float termIdf = idf(df, max);
exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
idf += termIdf;
Index: lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java (working copy)
@@ -26,7 +26,7 @@
public class BasicModelIF extends BasicModel {
@Override
public final float score(BasicStats stats, float tfn) {
- int N = stats.getNumberOfDocuments();
+ long N = stats.getNumberOfDocuments();
long F = stats.getTotalTermFreq();
return tfn * (float)(log2(1 + (N + 1) / (F + 0.5)));
}
Index: lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java (working copy)
@@ -87,9 +87,9 @@
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
* Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
- int numberOfDocuments = collectionStats.maxDoc();
+ long numberOfDocuments = collectionStats.maxDoc();
- int docFreq = termStats.docFreq();
+ long docFreq = termStats.docFreq();
long totalTermFreq = termStats.totalTermFreq();
// codec does not supply totalTermFreq: substitute docFreq
Index: lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java (working copy)
@@ -25,13 +25,13 @@
*/
public class BasicStats extends Similarity.Stats {
/** The number of documents. */
- protected int numberOfDocuments;
+ protected long numberOfDocuments;
/** The total number of tokens in the field. */
protected long numberOfFieldTokens;
/** The average field length. */
protected float avgFieldLength;
/** The document frequency. */
- protected int docFreq;
+ protected long docFreq;
/** The total number of occurrences of this term across all documents. */
protected long totalTermFreq;
@@ -55,12 +55,12 @@
// ------------------------- Getter/setter methods -------------------------
/** Returns the number of documents. */
- public int getNumberOfDocuments() {
+ public long getNumberOfDocuments() {
return numberOfDocuments;
}
/** Sets the number of documents. */
- public void setNumberOfDocuments(int numberOfDocuments) {
+ public void setNumberOfDocuments(long numberOfDocuments) {
this.numberOfDocuments = numberOfDocuments;
}
@@ -91,12 +91,12 @@
}
/** Returns the document frequency. */
- public int getDocFreq() {
+ public long getDocFreq() {
return docFreq;
}
/** Sets the document frequency. */
- public void setDocFreq(int docFreq) {
+ public void setDocFreq(long docFreq) {
this.docFreq = docFreq;
}
Index: lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java (working copy)
@@ -333,13 +333,13 @@
* idf(t) appears for t in both the query and the document,
* hence it is squared in the equation.
* The default computation for idf(t) in
- * {@link org.apache.lucene.search.similarities.DefaultSimilarity#idf(int, int) DefaultSimilarity} is:
+ * {@link org.apache.lucene.search.similarities.DefaultSimilarity#idf(long, long) DefaultSimilarity} is:
*
*
*
| - * {@link org.apache.lucene.search.similarities.DefaultSimilarity#idf(int, int) idf(t)} = + * {@link org.apache.lucene.search.similarities.DefaultSimilarity#idf(long, long) idf(t)} = * | *
* 1 + log (
@@ -526,7 +526,7 @@
public abstract class TFIDFSimilarity extends Similarity {
/** Computes a score factor based on a term or phrase's frequency in a
- * document. This value is multiplied by the {@link #idf(int, int)}
+ * document. This value is multiplied by the {@link #idf(long, long)}
* factor for each term in the query and these products are then summed to
* form the initial score for a document.
*
@@ -545,7 +545,7 @@
}
/** Computes a score factor based on a term or phrase's frequency in a
- * document. This value is multiplied by the {@link #idf(int, int)}
+ * document. This value is multiplied by the {@link #idf(long, long)}
* factor for each term in the query and these products are then summed to
* form the initial score for a document.
*
@@ -583,8 +583,8 @@
* @throws IOException
*/
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
- final int df = termStats.docFreq();
- final int max = collectionStats.maxDoc();
+ final long df = termStats.docFreq();
+ final long max = collectionStats.maxDoc();
final float idf = idf(df, max);
return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
}
@@ -604,12 +604,12 @@
* @throws IOException
*/
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
- final int max = collectionStats.maxDoc();
+ final long max = collectionStats.maxDoc();
float idf = 0.0f;
final Explanation exp = new Explanation();
exp.setDescription("idf(), sum of:");
for (final TermStatistics stat : termStats ) {
- final int df = stat.docFreq();
+ final long df = stat.docFreq();
final float termIdf = idf(df, max);
exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
idf += termIdf;
@@ -631,7 +631,7 @@
* @param numDocs the total number of documents in the collection
* @return a score factor based on the term's document frequency
*/
- public abstract float idf(int docFreq, int numDocs);
+ public abstract float idf(long docFreq, long numDocs);
/** Cache of decoded bytes. */
private static final float[] NORM_TABLE = new float[256];
Index: lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java (working copy)
@@ -27,7 +27,7 @@
@Override
public final float score(BasicStats stats, float tfn) {
long F = stats.getTotalTermFreq()+1;
- int n = stats.getDocFreq()+1;
+ long n = stats.getDocFreq()+1;
return (F + 1) / (n * (tfn + 1));
}
Index: lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java (working copy)
@@ -27,8 +27,8 @@
public class BasicModelIn extends BasicModel {
@Override
public final float score(BasicStats stats, float tfn) {
- int N = stats.getNumberOfDocuments();
- int n = stats.getDocFreq();
+ long N = stats.getNumberOfDocuments();
+ long n = stats.getDocFreq();
return tfn * (float)(log2((N + 1) / (n + 0.5)));
}
Index: lucene/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java (working copy)
@@ -62,7 +62,7 @@
/** Implemented as log(numDocs/(docFreq+1)) + 1. */
@Override
- public float idf(int docFreq, int numDocs) {
+ public float idf(long docFreq, long numDocs) {
return (float)(Math.log(numDocs/(double)(docFreq+1)) + 1.0);
}
Index: lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java (revision 1235565)
+++ lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java (working copy)
@@ -27,7 +27,7 @@
public class BasicModelIne extends BasicModel {
@Override
public final float score(BasicStats stats, float tfn) {
- int N = stats.getNumberOfDocuments();
+ long N = stats.getNumberOfDocuments();
long F = stats.getTotalTermFreq();
double ne = N * (1 - Math.pow((N - 1) / (double)N, F));
return tfn * (float)(log2((N + 1) / (ne + 0.5)));
|