cvs diff -Nu ? build ? dist ? META-INF ? prj cvs server: Diffing . cvs server: Diffing docs cvs server: Diffing docs/images cvs server: Diffing docs/lucene-sandbox cvs server: Diffing docs/lucene-sandbox/indyo cvs server: Diffing docs/lucene-sandbox/larm cvs server: Diffing lib cvs server: Diffing metadata cvs server: Diffing src cvs server: Diffing src/demo cvs server: Diffing src/demo/org cvs server: Diffing src/demo/org/apache cvs server: Diffing src/demo/org/apache/lucene cvs server: Diffing src/demo/org/apache/lucene/HTMLParser cvs server: Diffing src/demo/org/apache/lucene/demo cvs server: Diffing src/demo/org/apache/lucene/demo/html cvs server: Diffing src/java cvs server: Diffing src/java/org cvs server: Diffing src/java/org/apache cvs server: Diffing src/java/org/apache/lucene cvs server: Diffing src/java/org/apache/lucene/analysis Index: src/java/org/apache/lucene/analysis/StopFilter.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/analysis/StopFilter.java,v retrieving revision 1.12 diff -u -r1.12 StopFilter.java --- src/java/org/apache/lucene/analysis/StopFilter.java 29 Mar 2004 22:48:00 -0000 1.12 +++ src/java/org/apache/lucene/analysis/StopFilter.java 26 Jul 2004 18:10:58 -0000 @@ -28,13 +28,23 @@ public final class StopFilter extends TokenFilter { private Set stopWords; + private boolean ignoreCase; + + /** + * Construct a token stream filtering the given input. + */ + public StopFilter(TokenStream input, String [] stopWords) + { + this(input, stopWords, false); + } /** * Constructs a filter which removes words from the input * TokenStream that are named in the array of words. */ - public StopFilter(TokenStream in, String[] stopWords) { + public StopFilter(TokenStream in, String[] stopWords, boolean ignoreCase) { super(in); + this.ignoreCase = ignoreCase; this.stopWords = makeStopSet(stopWords); } @@ -43,12 +53,35 @@ * TokenStream that are named in the Hashtable. * * @deprecated Use {@link #StopFilter(TokenStream, Set)} instead - */ + */ public StopFilter(TokenStream in, Hashtable stopTable) { + this(in, stopTable, false); + } + /** + * Constructs a filter which removes words from the input + * TokenStream that are named in the Hashtable. + * + * @deprecated Use {@link #StopFilter(TokenStream, Set)} instead + */ + public StopFilter(TokenStream in, Hashtable stopTable, boolean ignoreCase) { super(in); + this.ignoreCase = ignoreCase; stopWords = new HashSet(stopTable.keySet()); } + /** + * Construct a token stream filtering the given input. + * @param input + * @param stopWords The set of Stop Words, as Strings. If ignoreCase is true, all strings should be lower cased + * @param ignoreCase -Ignore case when stopping. The stopWords set must be setup to contain only lower case words + */ + public StopFilter(TokenStream input, Set stopWords, boolean ignoreCase) + { + super(input); + this.ignoreCase = ignoreCase; + this.stopWords = stopWords; + } + /** * Constructs a filter which removes words from the input * TokenStream that are named in the Set. @@ -58,10 +91,8 @@ * @see #makeStopSet(java.lang.String[]) */ public StopFilter(TokenStream in, Set stopWords) { - super(in); - this.stopWords = stopWords; + this(in, stopWords, false); } - /** * Builds a Hashtable from an array of stop words, * appropriate for passing into the StopFilter constructor. @@ -71,9 +102,23 @@ * @deprecated Use {@link #makeStopSet(String[])} instead. */ public static final Hashtable makeStopTable(String[] stopWords) { - Hashtable stopTable = new Hashtable(stopWords.length); - for (int i = 0; i < stopWords.length; i++) - stopTable.put(stopWords[i], stopWords[i]); + return makeStopTable(stopWords, false); + } + + /** + * Builds a Hashtable from an array of stop words, + * appropriate for passing into the StopFilter constructor. + * This permits this table construction to be cached once when + * an Analyzer is constructed. + * @deprecated Use {@link #makeStopSet(java.lang.String[], boolean)} instead. + */ + public static final Hashtable makeStopTable(String [] stopWords, boolean ignoreCase) { + Hashtable stopTable = new Hashtable(stopWords.length); + for (int i = 0; i < stopWords.length; i++) + { + String stopWord = ignoreCase == false ? stopWords[i] : stopWords[i].toLowerCase(); + stopTable.put(stopWord, stopWord); + } return stopTable; } @@ -82,13 +127,25 @@ * appropriate for passing into the StopFilter constructor. * This permits this stopWords construction to be cached once when * an Analyzer is constructed. + * + * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase */ public static final Set makeStopSet(String[] stopWords) { + return makeStopSet(stopWords, false); + } + + /** + * + * @param stopWords + * @param ignoreCase If true, all words are lower cased first. + * @return + */ + public static final Set makeStopSet(String[] stopWords, boolean ignoreCase) { HashSet stopTable = new HashSet(stopWords.length); for (int i = 0; i < stopWords.length; i++) - stopTable.add(stopWords[i]); + stopTable.add(ignoreCase == false ? stopWords[i] : stopWords[i].toLowerCase()); return stopTable; - } + } /** * Returns the next input Token whose termText() is not a stop word. @@ -96,8 +153,11 @@ public final Token next() throws IOException { // return the first non-stop word found for (Token token = input.next(); token != null; token = input.next()) - if (!stopWords.contains(token.termText)) - return token; + { + String termText = ignoreCase == false ? token.termText : token.termText.toLowerCase(); + if (!stopWords.contains(termText)) + return token; + } // reached EOS -- return null return null; } cvs server: Diffing src/java/org/apache/lucene/analysis/de cvs server: Diffing src/java/org/apache/lucene/analysis/ru cvs server: Diffing src/java/org/apache/lucene/analysis/standard cvs server: Diffing src/java/org/apache/lucene/document cvs server: Diffing src/java/org/apache/lucene/index cvs server: Diffing src/java/org/apache/lucene/queryParser cvs server: Diffing src/java/org/apache/lucene/search cvs server: Diffing src/java/org/apache/lucene/search/spans cvs server: Diffing src/java/org/apache/lucene/store cvs server: Diffing src/java/org/apache/lucene/util cvs server: Diffing src/jsp cvs server: Diffing src/jsp/WEB-INF cvs server: Diffing src/jsp/WEB-INF/lib cvs server: Diffing src/test cvs server: Diffing src/test/org cvs server: Diffing src/test/org/apache cvs server: Diffing src/test/org/apache/lucene cvs server: Diffing src/test/org/apache/lucene/analysis cvs server: Diffing src/test/org/apache/lucene/analysis/ru cvs server: Diffing src/test/org/apache/lucene/document cvs server: Diffing src/test/org/apache/lucene/index cvs server: Diffing src/test/org/apache/lucene/index/store cvs server: Diffing src/test/org/apache/lucene/queryParser cvs server: Diffing src/test/org/apache/lucene/search cvs server: Diffing src/test/org/apache/lucene/search/spans cvs server: Diffing src/test/org/apache/lucene/store cvs server: Diffing src/test/org/apache/lucene/util cvs server: Diffing xdocs cvs server: Diffing xdocs/images cvs server: Diffing xdocs/lucene-sandbox cvs server: Diffing xdocs/lucene-sandbox/indyo cvs server: Diffing xdocs/lucene-sandbox/larm cvs server: Diffing xdocs/stylesheets