Index: MoreLikeThis.java =================================================================== --- MoreLikeThis.java (revision 798477) +++ MoreLikeThis.java (working copy) @@ -36,6 +36,8 @@ import java.util.Set; import java.util.HashMap; import java.util.Map; +import java.util.WeakHashMap; +import java.util.LinkedHashMap; import java.util.Collection; import java.util.Iterator; import java.io.IOException; @@ -287,6 +289,16 @@ private float boostFactor = 1; /** + * Cache term frequencies for recently accessed terms + */ + private WeakHashMap termFreqCache = null; + + /** + * Term cache size + */ + private int termFreqCacheSize = 0; + + /** * Returns the boost factor used when boosting terms * @return the boost factor used when boosting terms */ @@ -301,6 +313,14 @@ public void setBoostFactor(float boostFactor) { this.boostFactor = boostFactor; } + + /** + * Sets the boost factor to use when boosting terms + * @param boostFactor + */ + public void setTermCacheSize(int size) { + this.termFreqCacheSize = size; + } /** * Constructor requiring an IndexReader. @@ -310,9 +330,22 @@ } public MoreLikeThis(IndexReader ir, Similarity sim){ - this.ir = ir; - this.similarity = sim; + this(ir, sim, 1000); } + + public MoreLikeThis(IndexReader ir, int termCacheSize ){ + this(ir, new DefaultSimilarity(), termCacheSize); + } + + public MoreLikeThis(IndexReader ir, Similarity sim, int termCacheSize ){ + this.termFreqCacheSize = termCacheSize; + this.ir = ir; + this.similarity = sim; + termFreqCache = new WeakHashMap(); + LRULinkedHashMap fields = new LRULinkedHashMap(termCacheSize+1, 0.75f, true); + fields.setMaxTerms(termCacheSize); + termFreqCache.put(ir, fields); + } public Similarity getSimilarity() { @@ -648,8 +681,10 @@ // go through all the fields and find the largest document frequency String topField = fieldNames[0]; int docFreq = 0; + Map cache = (Map)this.termFreqCache.get(this.ir); for (int i = 0; i < fieldNames.length; i++) { - int freq = ir.docFreq(new Term(fieldNames[i], word)); + + int freq = this.getTermFrequency( cache, word, fieldNames[i] ); topField = (freq > docFreq) ? fieldNames[i] : topField; docFreq = (freq > docFreq) ? freq : docFreq; } @@ -678,6 +713,35 @@ } /** + * Fetches term frequency from the index, via the LRU cache + */ + private int getTermFrequency( Map cache, String term, String field ) throws IOException + { + // get the field cache + Map fieldCache = (Map)cache.get(field); + int freq = 0; + + if ( fieldCache == null ) { + fieldCache = new HashMap(); + fieldCache.put(term,new Integer(freq)); + cache.put( field, fieldCache ); + freq = ir.docFreq(new Term(field, term)); + } + else { + Integer intbox = (Integer)fieldCache.get(term); + if ( intbox != null ) + { + freq = intbox.intValue(); + } + else + { + fieldCache.put(term,new Integer(freq)); + } + } + return freq; + } + + /** * Describe the parameters that control how the "more like this" query is formed. */ public String describeParams() { @@ -973,5 +1037,20 @@ } } + protected class LRULinkedHashMap extends LinkedHashMap + { + public LRULinkedHashMap(int initialCapacity, float loadFactor, boolean accessOrder) { + super(initialCapacity, loadFactor, accessOrder); + } + private int MAX_TERMS = 0; + public void setMaxTerms(int terms) { + MAX_TERMS = terms; + } + public boolean removeEldestEntry(Map.Entry eldest) { + return size() > MAX_TERMS; + } + } } + + Index: MoreLikeThisQuery.java =================================================================== --- MoreLikeThisQuery.java (revision 798477) +++ MoreLikeThisQuery.java (working copy) @@ -43,6 +43,7 @@ private String likeText; private String[] moreLikeFields; private Analyzer analyzer; + private int termCacheSize = -1; float percentTermsToMatch=0.3f; int minTermFrequency=1; int maxQueryTerms=5; @@ -55,15 +56,20 @@ */ public MoreLikeThisQuery(String likeText, String[] moreLikeFields, Analyzer analyzer) { + this(likeText, moreLikeFields, analyzer, -1 ); + } + public MoreLikeThisQuery(String likeText, String[] moreLikeFields, Analyzer analyzer, int termCacheSize ) + { this.likeText=likeText; this.moreLikeFields=moreLikeFields; this.analyzer=analyzer; + this.termCacheSize=termCacheSize; } public Query rewrite(IndexReader reader) throws IOException { MoreLikeThis mlt=new MoreLikeThis(reader); - + if ( this.termCacheSize > -1 ) mlt.setTermCacheSize(this.termCacheSize); mlt.setFieldNames(moreLikeFields); mlt.setAnalyzer(analyzer); mlt.setMinTermFreq(minTermFrequency);