Index: MoreLikeThis.java =================================================================== --- MoreLikeThis.java (revision 766250) +++ MoreLikeThis.java (working copy) @@ -148,6 +148,12 @@ public final class MoreLikeThis { /** + * Default term frequency cache + * @see #setCache + */ + public static final boolean DEFAULT_CACHE = false; + + /** * Default maximum number of tokens to parse in each example doc field that is not stored with TermVector support. * @see #getMaxNumTokensParsed */ @@ -254,8 +260,12 @@ */ private int maxNumTokensParsed=DEFAULT_MAX_NUM_TOKENS_PARSED; + /** + * Whether or not to cache term frequencies + */ + private boolean _useCache = DEFAULT_CACHE; + private static Map> _cache = new HashMap>(); - /** * Ignore words if less than this len. */ @@ -507,6 +517,13 @@ } + /** + * @param useCache Whether or not to cache Term frequencies + */ + public void setCache( boolean useCache ) + { + _useCache = useCache; + } /** @@ -627,10 +644,26 @@ // go through all the fields and find the largest document frequency String topField = fieldNames[0]; int docFreq = 0; - for (int i = 0; i < fieldNames.length; i++) { - int freq = ir.docFreq(new Term(fieldNames[i], word)); - topField = (freq > docFreq) ? fieldNames[i] : topField; - docFreq = (freq > docFreq) ? freq : docFreq; + for (int i = 0; i < fieldNames.length; i++) + { + Integer freq; + if ( _useCache ) + { + // Check the term frequency cache + if ( _cache.get(fieldNames[i]) == null ) _cache.put( fieldNames[i], new HashMap() ); + freq = _cache.get(fieldNames[i]).get(word); + if ( freq == null ) + { + freq = ir.docFreq(new Term(fieldNames[i], word)); + _cache.get(fieldNames[i]).put( word, freq ); + } + } + else + { + freq = ir.docFreq(new Term(fieldNames[i], word)); + } + topField = (freq > docFreq) ? fieldNames[i] : topField; + docFreq = (freq > docFreq) ? freq : docFreq; } if (minDocFreq > 0 && docFreq < minDocFreq) {