Index: lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java (revision 948097) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java (working copy) @@ -24,6 +24,7 @@ import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; @@ -76,6 +77,7 @@ MoreLikeThis mlt = new MoreLikeThis( reader); + mlt.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); @@ -110,6 +112,7 @@ private Map getOriginalValues() throws IOException { Map originalValues = new HashMap(); MoreLikeThis mlt = new MoreLikeThis(reader); + mlt.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); Index: lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (revision 948097) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (working copy) @@ -32,7 +32,6 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; @@ -49,7 +48,6 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.PriorityQueue; -import org.apache.lucene.util.Version; /** @@ -158,13 +156,6 @@ */ public static final int DEFAULT_MAX_NUM_TOKENS_PARSED=5000; - - /** - * Default analyzer to parse source doc with. - * @see #getAnalyzer - */ - public static final Analyzer DEFAULT_ANALYZER = new StandardAnalyzer(Version.LUCENE_CURRENT); - /** * Ignore terms with less than this frequency in the source doc. * @see #getMinTermFreq @@ -240,7 +231,7 @@ /** * Analyzer that will be used to parse the doc. */ - private Analyzer analyzer = DEFAULT_ANALYZER; + private Analyzer analyzer = null; /** * Ignore words less frequent that this. @@ -343,10 +334,9 @@ /** * Returns an analyzer that will be used to parse source doc with. The default analyzer - * is the {@link #DEFAULT_ANALYZER}. + * is not set. * * @return the analyzer that will be used to parse source doc with. - * @see #DEFAULT_ANALYZER */ public Analyzer getAnalyzer() { return analyzer; @@ -887,6 +877,10 @@ private void addTermFrequencies(Reader r, Map termFreqMap, String fieldName) throws IOException { + if (analyzer == null) { + throw new UnsupportedOperationException("To use MoreLikeThis without " + + "term vectors, you must provide an Analyzer"); + } TokenStream ts = analyzer.tokenStream(fieldName, r); int tokenCount=0; // for every token