Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 828807) +++ CHANGES.txt (working copy) @@ -42,6 +42,11 @@ settings in StandardAnalyzer to be compatible with the coming Lucene 3.0. (Uwe Schindler) + * LUCENE-2002: Add required Version matchVersion argument when + constructing QueryParser or MultiFieldQueryParser and, default (as + of 2.9) enablePositionIncrements to true to match + StandardAnalyzer's 2.9 default (Uwe Schindler, Mike McCandless) + Documentation * LUCENE-1955: Fix Hits deprecation notice to point users in right Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== --- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 828807) +++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy) @@ -47,6 +47,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.FuzzyQuery; @@ -60,7 +61,10 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.util.LocalizedTestCase; +import org.apache.lucene.util.Version; /** * Tests QueryParser. @@ -1014,4 +1018,46 @@ BooleanQuery.setMaxClauseCount(originalMaxClauses); } + // LUCENE-2002: make sure defaults for StandardAnalyzer's + // enableStopPositionIncr & QueryParser's enablePosIncr + // "match" + public void testPositionIncrements() throws Exception { + Directory dir = new MockRAMDirectory(); + Analyzer a = new StandardAnalyzer(Version.LUCENE_CURRENT); + IndexWriter w = new IndexWriter(dir, a, IndexWriter.MaxFieldLength.UNLIMITED); + Document doc = new Document(); + doc.add(new Field("f", "the wizard of ozzy", Field.Store.NO, Field.Index.ANALYZED)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + IndexSearcher s = new IndexSearcher(r); + QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "f", a); + Query q = qp.parse("\"wizard of ozzy\""); + assertEquals(1, s.search(q, 1).totalHits); + r.close(); + dir.close(); + } + + // LUCENE-2002: unfortunately, whenever you regen + // QueryParser (with javacc), you have to manually edit + // these constructors to make them protected not public: + // + // protected QueryParser(CharStream stream) + // + // protected QueryParser(QueryParserTokenManager tm) + public void testProtectedCtors() throws Exception { + try { + QueryParser.class.getConstructor(new Class[] {CharStream.class}); + fail("please switch public QueryParser(CharStream) to be protected"); + } catch (NoSuchMethodException nsme) { + // expected + } + try { + QueryParser.class.getConstructor(new Class[] {QueryParserTokenManager.class}); + fail("please switch public QueryParser(QueryParserTokenManager) to be protected"); + } catch (NoSuchMethodException nsme) { + // expected + } + } + } Index: src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java =================================================================== --- src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (revision 828807) +++ src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (working copy) @@ -5,6 +5,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.util.Version; import java.io.StringReader; @@ -108,15 +109,24 @@ } public void testDomainNames() throws Exception { - // Don't reuse a because we alter its state (setReplaceInvalidAcronym) - StandardAnalyzer a2 = new StandardAnalyzer(); + // Don't reuse a because we alter its state + // (setReplaceInvalidAcronym) + + // Current lucene should not show the bug + StandardAnalyzer a2 = new StandardAnalyzer(Version.LUCENE_CURRENT); // domain names assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"}); //Notice the trailing . See https://issues.apache.org/jira/browse/LUCENE-1068. // the following should be recognized as HOST: assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "" }); - a2.setReplaceInvalidAcronym(false); + + // 2.3 should show the bug + a2 = new StandardAnalyzer(Version.LUCENE_23); assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "" }); + + // 2.4 should not show the bug + a2 = new StandardAnalyzer(Version.LUCENE_24); + assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "" }); } public void testEMailAddresses() throws Exception { Index: src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java =================================================================== --- src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java (revision 828807) +++ src/java/org/apache/lucene/queryParser/MultiFieldQueryParser.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.Version; /** * A QueryParser which constructs queries to search multiple fields. @@ -64,13 +65,47 @@ * *

In other words, all the query's terms must appear, but it doesn't matter in * what fields they appear.

+ * + * @deprecated Please use {@link #MultiFieldQueryParser(Version, String[], Analyzer, Map)} instead */ public MultiFieldQueryParser(String[] fields, Analyzer analyzer, Map boosts) { - this(fields,analyzer); + this(Version.LUCENE_24, fields, analyzer); this.boosts = boosts; } /** + * Creates a MultiFieldQueryParser. + * Allows passing of a map with term to Boost, and the boost to apply to each term. + * + *

It will, when parse(String query) + * is called, construct a query like this (assuming the query consists of + * two terms and you specify the two fields title and body):

+ * + * + * (title:term1 body:term1) (title:term2 body:term2) + * + * + *

When setDefaultOperator(AND_OPERATOR) is set, the result will be:

+ * + * + * +(title:term1 body:term1) +(title:term2 body:term2) + * + * + *

When you pass a boost (title=>5 body=>10) you can get

+ * + * + * +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0) + * + * + *

In other words, all the query's terms must appear, but it doesn't matter in + * what fields they appear.

+ */ + public MultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer, Map boosts) { + this(matchVersion, fields, analyzer); + this.boosts = boosts; + } + + /** * Creates a MultiFieldQueryParser. * *

It will, when parse(String query) @@ -89,9 +124,35 @@ * *

In other words, all the query's terms must appear, but it doesn't matter in * what fields they appear.

+ * + * @deprecated Please use {@link #MultiFieldQueryParser(Version, String[], Analyzer)} instead */ public MultiFieldQueryParser(String[] fields, Analyzer analyzer) { - super(null, analyzer); + this(Version.LUCENE_24, fields, analyzer); + } + + /** + * Creates a MultiFieldQueryParser. + * + *

It will, when parse(String query) + * is called, construct a query like this (assuming the query consists of + * two terms and you specify the two fields title and body):

+ * + * + * (title:term1 body:term1) (title:term2 body:term2) + * + * + *

When setDefaultOperator(AND_OPERATOR) is set, the result will be:

+ * + * + * +(title:term1 body:term1) +(title:term2 body:term2) + * + * + *

In other words, all the query's terms must appear, but it doesn't matter in + * what fields they appear.

+ */ + public MultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer) { + super(matchVersion, null, analyzer); this.fields = fields; } @@ -202,16 +263,40 @@ * @throws ParseException if query parsing fails * @throws IllegalArgumentException if the length of the queries array differs * from the length of the fields array + * @deprecated Use {@link #parse(Version,String[],String[],Analyzer)} instead */ public static Query parse(String[] queries, String[] fields, Analyzer analyzer) throws ParseException { + return parse(Version.LUCENE_24, queries, fields, analyzer); + } + + /** + * Parses a query which searches on the fields specified. + *

+ * If x fields are specified, this effectively constructs: + *

+   * 
+   * (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+   * 
+   * 
+ * @param matchVersion Lucene version to match; this is passed through to QueryParser. + * @param queries Queries strings to parse + * @param fields Fields to search on + * @param analyzer Analyzer to use + * @throws ParseException if query parsing fails + * @throws IllegalArgumentException if the length of the queries array differs + * from the length of the fields array + */ + public static Query parse(Version matchVersion, String[] queries, String[] fields, + Analyzer analyzer) throws ParseException + { if (queries.length != fields.length) throw new IllegalArgumentException("queries.length != fields.length"); BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { - QueryParser qp = new QueryParser(fields[i], analyzer); + QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); Query q = qp.parse(queries[i]); if (q!=null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) { @@ -250,14 +335,51 @@ * @throws ParseException if query parsing fails * @throws IllegalArgumentException if the length of the fields array differs * from the length of the flags array + * @deprecated Use {@link #parse(Version, String, String[], BooleanClause.Occur[], Analyzer)} instead */ public static Query parse(String query, String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException { + return parse(Version.LUCENE_24, query, fields, flags, analyzer); + } + + /** + * Parses a query, searching on the fields specified. + * Use this if you need to specify certain fields as required, + * and others as prohibited. + *

+   * Usage:
+   * 
+   * String[] fields = {"filename", "contents", "description"};
+   * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+   *                BooleanClause.Occur.MUST,
+   *                BooleanClause.Occur.MUST_NOT};
+   * MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+   * 
+   * 
+ *

+ * The code above would construct a query: + *

+   * 
+   * (filename:query) +(contents:query) -(description:query)
+   * 
+   * 
+ * + * @param matchVersion Lucene version to match; this is passed through to QueryParser. + * @param query Query string to parse + * @param fields Fields to search on + * @param flags Flags describing the fields + * @param analyzer Analyzer to use + * @throws ParseException if query parsing fails + * @throws IllegalArgumentException if the length of the fields array differs + * from the length of the flags array + */ + public static Query parse(Version matchVersion, String query, String[] fields, + BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException { if (fields.length != flags.length) throw new IllegalArgumentException("fields.length != flags.length"); BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { - QueryParser qp = new QueryParser(fields[i], analyzer); + QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); Query q = qp.parse(query); if (q!=null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) { @@ -297,16 +419,55 @@ * @throws ParseException if query parsing fails * @throws IllegalArgumentException if the length of the queries, fields, * and flags array differ + * @deprecated Used {@link #parse(Version, String[], String[], BooleanClause.Occur[], Analyzer)} instead */ public static Query parse(String[] queries, String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException { + return parse(Version.LUCENE_24, queries, fields, flags, analyzer); + } + + /** + * Parses a query, searching on the fields specified. + * Use this if you need to specify certain fields as required, + * and others as prohibited. + *

+   * Usage:
+   * 
+   * String[] query = {"query1", "query2", "query3"};
+   * String[] fields = {"filename", "contents", "description"};
+   * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+   *                BooleanClause.Occur.MUST,
+   *                BooleanClause.Occur.MUST_NOT};
+   * MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+   * 
+   * 
+ *

+ * The code above would construct a query: + *

+   * 
+   * (filename:query1) +(contents:query2) -(description:query3)
+   * 
+   * 
+ * + * @param matchVersion Lucene version to match; this is passed through to QueryParser. + * @param queries Queries string to parse + * @param fields Fields to search on + * @param flags Flags describing the fields + * @param analyzer Analyzer to use + * @throws ParseException if query parsing fails + * @throws IllegalArgumentException if the length of the queries, fields, + * and flags array differ + */ + public static Query parse(Version matchVersion, String[] queries, String[] fields, BooleanClause.Occur[] flags, + Analyzer analyzer) throws ParseException + { if (!(queries.length == fields.length && queries.length == flags.length)) throw new IllegalArgumentException("queries, fields, and flags array have have different length"); BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { - QueryParser qp = new QueryParser(fields[i], analyzer); + QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); Query q = qp.parse(queries[i]); if (q!=null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) { Index: src/java/org/apache/lucene/queryParser/QueryParser.java =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 828807) +++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy) @@ -35,6 +35,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.Parameter; +import org.apache.lucene.util.Version; /** * This class is generated by JavaCC. The most important method is @@ -101,6 +102,14 @@ *

NOTE: there is a new QueryParser in contrib, which matches * the same syntax as this class, but is more modular, * enabling substantial customization to how a query is created. + * + * + *

NOTE: You must specify the required {@link Version} + * compatibility when creating QueryParser: + *

*/ public class QueryParser implements QueryParserConstants { @@ -125,7 +134,7 @@ boolean lowercaseExpandedTerms = true; MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; boolean allowLeadingWildcard = false; - boolean enablePositionIncrements = false; + boolean enablePositionIncrements = true; Analyzer analyzer; String field; @@ -158,11 +167,27 @@ /** Constructs a query parser. * @param f the default field for query terms. * @param a used to find terms in the query text. + * @deprecated Use {@link #QueryParser(String, Analyzer, + * Version)} instead. */ public QueryParser(String f, Analyzer a) { + this(Version.LUCENE_24, f, a); + } + + /** Constructs a query parser. + * @param matchVersion Lucene version to match. See {@link above) + * @param f the default field for query terms. + * @param a used to find terms in the query text. + */ + public QueryParser(Version matchVersion, String f, Analyzer a) { this(new FastCharStream(new StringReader(""))); analyzer = a; field = f; + if (matchVersion.onOrAfter(Version.LUCENE_29)) { + enablePositionIncrements = true; + } else { + enablePositionIncrements = false; + } } /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. @@ -759,7 +784,7 @@ DateTools.Resolution resolution = getDateResolution(field); if (resolution == null) { // no default or field specific date resolution has been set, - // use deprecated DateField to maintain compatibilty with + // use deprecated DateField to maintain compatibility with // pre-1.9 Lucene versions. part1 = DateField.dateToString(d1); part2 = DateField.dateToString(d2); @@ -1155,7 +1180,7 @@ System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser "); System.exit(0); } - QueryParser qp = new QueryParser("field", + QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new org.apache.lucene.analysis.SimpleAnalyzer()); Query q = qp.parse(args[0]); System.out.println(q.toString("field")); @@ -1591,6 +1616,12 @@ finally { jj_save(0, xla); } } + private boolean jj_3R_2() { + if (jj_scan_token(TERM)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + private boolean jj_3_1() { Token xsp; xsp = jj_scanpos; @@ -1607,12 +1638,6 @@ return false; } - private boolean jj_3R_2() { - if (jj_scan_token(TERM)) return true; - if (jj_scan_token(COLON)) return true; - return false; - } - /** Generated Token Manager. */ public QueryParserTokenManager token_source; /** Current token. */ @@ -1641,7 +1666,7 @@ private int jj_gc = 0; /** Constructor with user supplied CharStream. */ - public QueryParser(CharStream stream) { + protected QueryParser(CharStream stream) { token_source = new QueryParserTokenManager(stream); token = new Token(); jj_ntk = -1; @@ -1661,7 +1686,7 @@ } /** Constructor with generated Token Manager. */ - public QueryParser(QueryParserTokenManager tm) { + protected QueryParser(QueryParserTokenManager tm) { token_source = tm; token = new Token(); jj_ntk = -1; Index: src/java/org/apache/lucene/queryParser/QueryParser.jj =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 828807) +++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy) @@ -59,6 +59,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.Parameter; +import org.apache.lucene.util.Version; /** * This class is generated by JavaCC. The most important method is @@ -125,6 +126,14 @@ *

NOTE: there is a new QueryParser in contrib, which matches * the same syntax as this class, but is more modular, * enabling substantial customization to how a query is created. + * + * + *

NOTE: You must specify the required {@link Version} + * compatibility when creating QueryParser: + *

*/ public class QueryParser { @@ -149,7 +158,7 @@ boolean lowercaseExpandedTerms = true; MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; boolean allowLeadingWildcard = false; - boolean enablePositionIncrements = false; + boolean enablePositionIncrements = true; Analyzer analyzer; String field; @@ -182,11 +191,27 @@ /** Constructs a query parser. * @param f the default field for query terms. * @param a used to find terms in the query text. + * @deprecated Use {@link #QueryParser(String, Analyzer, + * Version)} instead. */ public QueryParser(String f, Analyzer a) { + this(Version.LUCENE_24, f, a); + } + + /** Constructs a query parser. + * @param matchVersion Lucene version to match. See {@link above) + * @param f the default field for query terms. + * @param a used to find terms in the query text. + */ + public QueryParser(Version matchVersion, String f, Analyzer a) { this(new FastCharStream(new StringReader(""))); analyzer = a; field = f; + if (matchVersion.onOrAfter(Version.LUCENE_29)) { + enablePositionIncrements = true; + } else { + enablePositionIncrements = false; + } } /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. @@ -1179,7 +1204,7 @@ System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser "); System.exit(0); } - QueryParser qp = new QueryParser("field", + QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new org.apache.lucene.analysis.SimpleAnalyzer()); Query q = qp.parse(args[0]); System.out.println(q.toString("field")); Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 828807) +++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy) @@ -33,6 +33,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.Parameter; +import org.apache.lucene.util.Version; /** Token Manager. */ public class QueryParserTokenManager implements QueryParserConstants Index: src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (revision 828807) +++ src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (working copy) @@ -35,7 +35,7 @@ * compatibility when creating StandardAnalyzer: *
    *
  • As of 2.9, StopFilter preserves position - * increments by default + * increments *
  • As of 2.4, Tokens incorrectly identified as acronyms * are corrected (see LUCENE-1608 *
Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 828807) +++ src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.Version; /** A grammar-based tokenizer constructed with JFlex * @@ -43,6 +44,14 @@ *

Many applications have specific tokenizer needs. If this tokenizer does * not suit your application, please consider copying this source code * directory to your project and maintaining your own grammar-based tokenizer. + * + * + *

You must specify the required {@link Version} + * compatibility when creating StandardAnalyzer: + *

*/ public class StandardTokenizer extends Tokenizer { @@ -107,9 +116,12 @@ /** * Creates a new instance of the {@link StandardTokenizer}. Attaches the * input to a newly created JFlex scanner. + * + * @deprecated Use {@link #StandardTokenizer(Version, + * Reader)} instead */ public StandardTokenizer(Reader input) { - this(input, false); + this(Version.LUCENE_24, input); } /** @@ -120,6 +132,8 @@ * @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms with HOST. * * See http://issues.apache.org/jira/browse/LUCENE-1068 + * + * @deprecated Use {@link #StandardTokenizer(Version, Reader)} instead */ public StandardTokenizer(Reader input, boolean replaceInvalidAcronym) { super(); @@ -128,7 +142,23 @@ } /** + * Creates a new instance of the {@link org.apache.lucene.analysis.standard.StandardTokenizer}. Attaches + * the input to the newly created JFlex scanner. + * + * @param input The input reader + * + * See http://issues.apache.org/jira/browse/LUCENE-1068 + */ + public StandardTokenizer(Version matchVersion, Reader input) { + super(); + this.scanner = new StandardTokenizerImpl(input); + init(input, matchVersion); + } + + /** * Creates a new StandardTokenizer with a given {@link AttributeSource}. + * + * @deprecated Use {@link #StandardTokenizer(Version, AttributeSource, Reader)} instead */ public StandardTokenizer(AttributeSource source, Reader input, boolean replaceInvalidAcronym) { super(source); @@ -137,7 +167,18 @@ } /** + * Creates a new StandardTokenizer with a given {@link AttributeSource}. + */ + public StandardTokenizer(Version matchVersion, AttributeSource source, Reader input) { + super(source); + this.scanner = new StandardTokenizerImpl(input); + init(input, matchVersion); + } + + /** * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} + * + * @deprecated Use {@link #StandardTokenizer(Version, AttributeFactory, Reader)} instead */ public StandardTokenizer(AttributeFactory factory, Reader input, boolean replaceInvalidAcronym) { super(factory); @@ -145,6 +186,15 @@ init(input, replaceInvalidAcronym); } + /** + * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} + */ + public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) { + super(factory); + this.scanner = new StandardTokenizerImpl(input); + init(input, matchVersion); + } + private void init(Reader input, boolean replaceInvalidAcronym) { this.replaceInvalidAcronym = replaceInvalidAcronym; this.input = input; @@ -153,6 +203,14 @@ posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class); } + + private void init(Reader input, Version matchVersion) { + if (matchVersion.onOrAfter(Version.LUCENE_24)) { + init(input, true); + } else { + init(input, false); + } + } // this tokenizer generates three attributes: // offset, positionIncrement and type Index: src/java/org/apache/lucene/analysis/StopFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/StopFilter.java (revision 828807) +++ src/java/org/apache/lucene/analysis/StopFilter.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.queryParser.QueryParser; // for javadoc +import org.apache.lucene.util.Version; /** * Removes stop words from a token stream. @@ -243,6 +244,21 @@ } /** + * Returns version-dependent default for + * enablePositionIncrements. Analyzers that embed + * StopFilter use this method when creating the + * StopFilter. Prior to 2.9, this returns false. On 2.9 + * or later, it returns true. + */ + public static boolean getEnablePositionIncrementsVersionDefault(Version matchVersion) { + if (matchVersion.onOrAfter(Version.LUCENE_29)) { + return true; + } else { + return false; + } + } + + /** * Set the default position increments behavior of every StopFilter created from now on. *

* Note: behavior of a single StopFilter instance can be modified Index: build.xml =================================================================== --- build.xml (revision 828807) +++ build.xml (working copy) @@ -587,9 +587,21 @@ - + + + + + + + + Index: contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java =================================================================== --- contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java (revision 828807) +++ contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.standard.*; +import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -30,20 +31,39 @@ * Available stemmers are listed in org.tartarus.snowball.ext. The name of a * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in * {@link org.tartarus.snowball.ext.EnglishStemmer} is named "English". + * + *

NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.

*/ public class SnowballAnalyzer extends Analyzer { private String name; private Set stopSet; + private final Version matchVersion; + /** Builds the named analyzer with no stop words. + * + * @deprecated Use {@link {#SnowballAnalyzer(Version, String)} instead*/ + public SnowballAnalyzer(String name) { + this(Version.LUCENE_23, name); + } + /** Builds the named analyzer with no stop words. */ - public SnowballAnalyzer(String name) { + public SnowballAnalyzer(Version matchVersion, String name) { this.name = name; setOverridesTokenStreamMethod(SnowballAnalyzer.class); + this.matchVersion = matchVersion; } + /** Builds the named analyzer with the given stop words. + * + * @deprecated Use {@link {#SnowballAnalyzer(Version, String, String[])} instead*/ + public SnowballAnalyzer(String name, String[] stopWords) { + this(Version.LUCENE_23, name, stopWords); + } + /** Builds the named analyzer with the given stop words. */ - public SnowballAnalyzer(String name, String[] stopWords) { - this(name); + public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) { + this(matchVersion, name); stopSet = StopFilter.makeStopSet(stopWords); } @@ -51,11 +71,12 @@ StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(reader); + TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); if (stopSet != null) - result = new StopFilter(result, stopSet); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopSet); result = new SnowballFilter(result, name); return result; } @@ -80,11 +101,12 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new LowerCaseFilter(streams.result); if (stopSet != null) - streams.result = new StopFilter(streams.result, stopSet); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stopSet); streams.result = new SnowballFilter(streams.result, name); setPreviousTokenStream(streams); } else { Index: contrib/CHANGES.txt =================================================================== --- contrib/CHANGES.txt (revision 828807) +++ contrib/CHANGES.txt (working copy) @@ -10,6 +10,15 @@ list. This has no effect on Arabic text, but if you are using a custom stopword list that contains some non-Arabic words, you'll need to fully reindex. (DM Smith via Robert Muir) + +API Changes: + + * LUCENE-2002: Add required Version matchVersion argument when + constructing ComplexPhraseQueryParser and default (as of 2.9) + enablePositionIncrements to true to match StandardAnalyzer's + default. Also added required matchVersion to most of the analyzers + (Uwe Schindler, Mike McCandless) + Bug fixes Index: contrib/misc/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java =================================================================== --- contrib/misc/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java (revision 828807) +++ contrib/misc/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java (working copy) @@ -38,6 +38,7 @@ import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.Version; /** * QueryParser which permits complex phrase query syntax eg "(john jon @@ -67,10 +68,17 @@ private ComplexPhraseQuery currentPhraseQuery = null; + /** @deprecated Use {@link + #ComplexPhraseQueryParser{Version, String, Analyzer)} + instead.*/ public ComplexPhraseQueryParser(String f, Analyzer a) { - super(f, a); + this(Version.LUCENE_24, f, a); } + public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) { + super(matchVersion, f, a); + } + protected Query getFieldQuery(String field, String queryText, int slop) { ComplexPhraseQuery cpq = new ComplexPhraseQuery(field, queryText, slop); complexPhrases.add(cpq); // add to list of phrases to be parsed once Index: contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java =================================================================== --- contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java (revision 828807) +++ contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java (working copy) @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.util.Version; /** * Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a @@ -139,6 +140,8 @@ private final Pattern pattern; private final boolean toLowerCase; private final Set stopWords; + + private final Version matchVersion; /** * Constructs a new instance with the given parameters. @@ -157,8 +160,33 @@ * WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt") * or other stop words * lists . + * + * @deprecated Use {@link #PatternAnalyzer(Version, Pattern, boolean, Set)} instead */ public PatternAnalyzer(Pattern pattern, boolean toLowerCase, Set stopWords) { + this(Version.LUCENE_24, pattern, toLowerCase, stopWords); + } + + /** + * Constructs a new instance with the given parameters. + * + * @param matchVersion If >= {@link Version#LUCENE_29}, StopFilter.enablePositionIncrement is set to true + * @param pattern + * a regular expression delimiting tokens + * @param toLowerCase + * if true returns tokens after applying + * String.toLowerCase() + * @param stopWords + * if non-null, ignores all tokens that are contained in the + * given stop set (after previously having applied toLowerCase() + * if applicable). For example, created via + * {@link StopFilter#makeStopSet(String[])}and/or + * {@link org.apache.lucene.analysis.WordlistLoader}as in + * WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt") + * or other stop words + * lists . + */ + public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, Set stopWords) { if (pattern == null) throw new IllegalArgumentException("pattern must not be null"); @@ -170,6 +198,7 @@ this.pattern = pattern; this.toLowerCase = toLowerCase; this.stopWords = stopWords; + this.matchVersion = matchVersion; } /** @@ -197,7 +226,7 @@ } else { stream = new PatternTokenizer(text, pattern, toLowerCase); - if (stopWords != null) stream = new StopFilter(false, stream, stopWords); + if (stopWords != null) stream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), stream, stopWords); } return stream; Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java =================================================================== --- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (revision 828807) +++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (working copy) @@ -31,6 +31,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.cn.smart.SentenceTokenizer; import org.apache.lucene.analysis.cn.smart.WordTokenFilter; +import org.apache.lucene.util.Version; /** *

@@ -59,15 +60,25 @@ public class SmartChineseAnalyzer extends Analyzer { private final Set stopWords; + private final Version matchVersion; /** * Create a new SmartChineseAnalyzer, using the default stopword list. + * + * @deprecated Use {@link #SmartChineseAnalyzer(Version)} instead */ public SmartChineseAnalyzer() { - this(true); + this(Version.LUCENE_24, true); } /** + * Create a new SmartChineseAnalyzer, using the default stopword list. + */ + public SmartChineseAnalyzer(Version matchVersion) { + this(matchVersion, true); + } + + /** *

* Create a new SmartChineseAnalyzer, optionally using the default stopword list. *

@@ -77,8 +88,26 @@ *

* * @param useDefaultStopWords true to use the default stopword list. + * + * @deprecated Use {@link #SmartChineseAnalyzer(Version, boolean)} instead */ public SmartChineseAnalyzer(boolean useDefaultStopWords) { + this(Version.LUCENE_24, useDefaultStopWords); + } + + /** + *

+ * Create a new SmartChineseAnalyzer, optionally using the default stopword list. + *

+ *

+ * The included default stopword list is simply a list of punctuation. + * If you do not use this list, punctuation will not be removed from the text! + *

+ * + * @param useDefaultStopWords true to use the default stopword list. + */ + public SmartChineseAnalyzer(Version matchVersion, boolean useDefaultStopWords) { + this.matchVersion = matchVersion; if (useDefaultStopWords) { try { InputStream stream = this.getClass().getResourceAsStream("stopwords.txt"); @@ -101,9 +130,25 @@ * Note: the set should include punctuation, unless you want to index punctuation! *

* @param stopWords {@link Set} of stopwords to use. + * + * @deprecated Use {@link #SmartChineseAnalyzer(Version, Set)} instead */ public SmartChineseAnalyzer(Set stopWords) { + this(Version.LUCENE_24, stopWords); + } + + /** + *

+ * Create a new SmartChineseAnalyzer, using the provided {@link Set} of stopwords. + *

+ *

+ * Note: the set should include punctuation, unless you want to index punctuation! + *

+ * @param stopWords {@link Set} of stopwords to use. + */ + public SmartChineseAnalyzer(Version matchVersion, Set stopWords) { this.stopWords = stopWords; + this.matchVersion = matchVersion; } public TokenStream tokenStream(String fieldName, Reader reader) { @@ -114,7 +159,8 @@ // The porter stemming is too strict, this is not a bug, this is a feature:) result = new PorterStemFilter(result); if (stopWords != null) { - result = new StopFilter(result, stopWords, false); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopWords, false); } return result; } @@ -134,7 +180,8 @@ streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream); streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream); if (stopWords != null) { - streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopWords, false); + streams.filteredTokenStream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.filteredTokenStream, stopWords, false); } } else { streams.tokenStream.reset(reader); Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.Version; /** * Test case for FrenchAnalyzer. @@ -32,7 +33,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { public void testAnalyzer() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(); + FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT); // test null reader boolean iaeFlag = false; Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (working copy) @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.ar.ArabicLetterTokenizer; import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Persian. @@ -66,11 +67,24 @@ */ public static final String STOPWORDS_COMMENT = "#"; + private final Version matchVersion; + /** * Builds an analyzer with the default stop words: * {@link #DEFAULT_STOPWORD_FILE}. + * + * @deprecated Use {@link #PersianAnalyzer(Version)} instead */ public PersianAnalyzer() { + this(Version.LUCENE_24); + } + + /** + * Builds an analyzer with the default stop words: + * {@link #DEFAULT_STOPWORD_FILE}. + */ + public PersianAnalyzer(Version matchVersion) { + this.matchVersion = matchVersion; try { InputStream stream = PersianAnalyzer.class .getResourceAsStream(DEFAULT_STOPWORD_FILE); @@ -86,24 +100,55 @@ /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #PersianAnalyzer(Version, String[])} instead */ public PersianAnalyzer(String[] stopwords) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public PersianAnalyzer(Version matchVersion, String[] stopwords) { stoptable = StopFilter.makeStopSet(stopwords); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #PersianAnalyzer(Version, Hashtable)} instead */ public PersianAnalyzer(Hashtable stopwords) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public PersianAnalyzer(Version matchVersion, Hashtable stopwords) { stoptable = new HashSet(stopwords.keySet()); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. Lines can be commented out * using {@link #STOPWORDS_COMMENT} + * + * @deprecated Use {@link #PersianAnalyzer(Version, File)} instead */ public PersianAnalyzer(File stopwords) throws IOException { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. Lines can be commented out + * using {@link #STOPWORDS_COMMENT} + */ + public PersianAnalyzer(Version matchVersion, File stopwords) throws IOException { stoptable = WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT); + this.matchVersion = matchVersion; } /** @@ -125,7 +170,8 @@ * the order here is important: the stopword list is normalized with the * above! */ - result = new StopFilter(result, stoptable); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable); return result; } @@ -158,7 +204,8 @@ * the order here is important: the stopword list is normalized with the * above! */ - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); setPreviousTokenStream(streams); } else { streams.source.reset(reader); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Russian language. @@ -193,41 +194,60 @@ * @deprecated Support for non-Unicode encodings will be removed in Lucene 3.0 */ private char[] charset; + private final Version matchVersion; + /** @deprecated Use {@link #RussianAnalyzer(Version)} instead */ + public RussianAnalyzer() { + this(Version.LUCENE_24); + } - public RussianAnalyzer() { + public RussianAnalyzer(Version matchVersion) { charset = RussianCharsets.UnicodeRussian; stopSet = StopFilter.makeStopSet( makeStopWords(RussianCharsets.UnicodeRussian)); + this.matchVersion = matchVersion; } /** * Builds an analyzer. - * @deprecated Use {@link #RussianAnalyzer()} instead. + * @deprecated Use {@link #RussianAnalyzer(Version)} instead. */ public RussianAnalyzer(char[] charset) { this.charset = charset; stopSet = StopFilter.makeStopSet(makeStopWords(charset)); + matchVersion = Version.LUCENE_24; } /** * Builds an analyzer with the given stop words. - * @deprecated Use {@link #RussianAnalyzer(String[])} instead. + * @deprecated Use {@link #RussianAnalyzer(Version,String[])} instead. */ public RussianAnalyzer(char[] charset, String[] stopwords) { this.charset = charset; stopSet = StopFilter.makeStopSet(stopwords); + matchVersion = Version.LUCENE_24; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #RussianAnalyzer(Version,String[])} instead. */ public RussianAnalyzer(String[] stopwords) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public RussianAnalyzer(Version matchVersion, String[] stopwords) + { this.charset = RussianCharsets.UnicodeRussian; stopSet = StopFilter.makeStopSet(stopwords); + this.matchVersion = matchVersion; } /** Takes russian stop words and translates them to a String array, using @@ -254,22 +274,36 @@ /** * Builds an analyzer with the given stop words. * TODO: create a Set version of this ctor - * @deprecated Use {@link #RussianAnalyzer(Map)} instead. + * + * @deprecated Use {@link #RussianAnalyzer(Version, Map)} instead. */ public RussianAnalyzer(char[] charset, Map stopwords) { this.charset = charset; stopSet = new HashSet(stopwords.keySet()); + matchVersion = Version.LUCENE_24; } /** * Builds an analyzer with the given stop words. * TODO: create a Set version of this ctor + * + * @deprecated Use {@link #RussianAnalyzer(Version, Map)} instead. */ public RussianAnalyzer(Map stopwords) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * TODO: create a Set version of this ctor + */ + public RussianAnalyzer(Version matchVersion, Map stopwords) + { charset = RussianCharsets.UnicodeRussian; stopSet = new HashSet(stopwords.keySet()); + this.matchVersion = matchVersion; } /** @@ -285,7 +319,8 @@ { TokenStream result = new RussianLetterTokenizer(reader, charset); result = new RussianLowerCaseFilter(result, charset); - result = new StopFilter(result, stopSet); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopSet); result = new RussianStemFilter(result, charset); return result; } @@ -311,7 +346,8 @@ streams = new SavedStreams(); streams.source = new RussianLetterTokenizer(reader, charset); streams.result = new RussianLowerCaseFilter(streams.source, charset); - streams.result = new StopFilter(streams.result, stopSet); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stopSet); streams.result = new RussianStemFilter(streams.result, charset); setPreviousTokenStream(streams); } else { Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (working copy) @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for German language. @@ -44,6 +45,9 @@ * exclusion list is empty by default. *

* + *

NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.

+ * * @version $Id$ */ public class GermanAnalyzer extends Analyzer { @@ -76,37 +80,80 @@ */ private Set exclusionSet = new HashSet(); + private final Version matchVersion; + /** * Builds an analyzer with the default stop words: * {@link #GERMAN_STOP_WORDS}. + * + * @deprecated Use {@link #GermanAnalyzer(Version)} instead */ public GermanAnalyzer() { + this(Version.LUCENE_23); + } + + /** + * Builds an analyzer with the default stop words: + * {@link #GERMAN_STOP_WORDS}. + */ + public GermanAnalyzer(Version matchVersion) { stopSet = StopFilter.makeStopSet(GERMAN_STOP_WORDS); setOverridesTokenStreamMethod(GermanAnalyzer.class); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #GermanAnalyzer(Version, String[])} instead */ public GermanAnalyzer(String[] stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public GermanAnalyzer(Version matchVersion, String[] stopwords) { stopSet = StopFilter.makeStopSet(stopwords); setOverridesTokenStreamMethod(GermanAnalyzer.class); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #GermanAnalyzer(Version, Map)} instead */ public GermanAnalyzer(Map stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public GermanAnalyzer(Version matchVersion, Map stopwords) { stopSet = new HashSet(stopwords.keySet()); setOverridesTokenStreamMethod(GermanAnalyzer.class); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #GermanAnalyzer(Version, File)} instead */ public GermanAnalyzer(File stopwords) throws IOException { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public GermanAnalyzer(Version matchVersion, File stopwords) throws IOException { stopSet = WordlistLoader.getWordSet(stopwords); setOverridesTokenStreamMethod(GermanAnalyzer.class); + this.matchVersion = matchVersion; } /** @@ -141,10 +188,11 @@ * {@link GermanStemFilter} */ public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(reader); + TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); - result = new StopFilter(result, stopSet); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopSet); result = new GermanStemFilter(result, exclusionSet); return result; } @@ -173,10 +221,11 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new LowerCaseFilter(streams.result); - streams.result = new StopFilter(streams.result, stopSet); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stopSet); streams.result = new GermanStemFilter(streams.result, exclusionSet); setPreviousTokenStream(streams); } else { Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -36,6 +37,9 @@ * that will not be indexed at all). * A default set of stopwords is used unless an alternative list is specified. *

+ * + *

NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.

*/ public final class GreekAnalyzer extends Analyzer { @@ -159,40 +163,62 @@ */ private char[] charset; + private final Version matchVersion; + + /** @deprecated Use {@link #GreekAnalyzer(Version)} instead */ public GreekAnalyzer() { + this(Version.LUCENE_23); + } + + public GreekAnalyzer(Version matchVersion) { charset = GreekCharsets.UnicodeGreek; stopSet = StopFilter.makeStopSet( makeStopWords(GreekCharsets.UnicodeGreek)); + this.matchVersion = matchVersion; } /** * Builds an analyzer. - * @deprecated Use {@link #GreekAnalyzer()} instead. + * @deprecated Use {@link #GreekAnalyzer(Version)} instead. */ public GreekAnalyzer(char[] charset) { this.charset = charset; stopSet = StopFilter.makeStopSet(makeStopWords(charset)); + matchVersion = Version.LUCENE_23; } /** * Builds an analyzer with the given stop words. * @param stopwords Array of stopwords to use. + * + * @deprecated Use {@link #GreekAnalyzer(Version, String[])} instead */ public GreekAnalyzer(String [] stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * @param stopwords Array of stopwords to use. + */ + public GreekAnalyzer(Version matchVersion, String [] stopwords) + { charset = GreekCharsets.UnicodeGreek; stopSet = StopFilter.makeStopSet(stopwords); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. - * @deprecated Use {@link #GreekAnalyzer(String[])} instead. + * @deprecated Use {@link #GreekAnalyzer(Version, String[])} instead. */ public GreekAnalyzer(char[] charset, String[] stopwords) { this.charset = charset; stopSet = StopFilter.makeStopSet(stopwords); + matchVersion = Version.LUCENE_23; } /** @@ -219,21 +245,33 @@ /** * Builds an analyzer with the given stop words. - * @deprecated Use {@link #GreekAnalyzer(Map)} instead. + * @deprecated Use {@link #GreekAnalyzer(Version, Map)} instead. */ public GreekAnalyzer(char[] charset, Map stopwords) { this.charset = charset; stopSet = new HashSet(stopwords.keySet()); + matchVersion = Version.LUCENE_23; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #GreekAnalyzer(Version,Map)} instead */ public GreekAnalyzer(Map stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public GreekAnalyzer(Version matchVersion, Map stopwords) + { charset = GreekCharsets.UnicodeGreek; stopSet = new HashSet(stopwords.keySet()); + this.matchVersion = matchVersion; } /** @@ -244,9 +282,10 @@ */ public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(reader); + TokenStream result = new StandardTokenizer(matchVersion, reader); result = new GreekLowerCaseFilter(result, charset); - result = new StopFilter(result, stopSet); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopSet); return result; } @@ -267,9 +306,10 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new GreekLowerCaseFilter(streams.source, charset); - streams.result = new StopFilter(streams.result, stopSet); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stopSet); setPreviousTokenStream(streams); } else { streams.source.reset(reader); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (working copy) @@ -32,6 +32,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WordlistLoader; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Arabic. @@ -69,10 +70,22 @@ */ public static final String STOPWORDS_COMMENT = "#"; + private final Version matchVersion; + /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. + * + * @deprecated Use {@link #ArabicAnalyzer(Version)} instead */ public ArabicAnalyzer() { + this(Version.LUCENE_24); + } + + /** + * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. + */ + public ArabicAnalyzer(Version matchVersion) { + this.matchVersion = matchVersion; try { InputStream stream = ArabicAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE); InputStreamReader reader = new InputStreamReader(stream, "UTF-8"); @@ -87,23 +100,53 @@ /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #ArabicAnalyzer(Version, String[])} instead */ public ArabicAnalyzer( String[] stopwords ) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public ArabicAnalyzer( Version matchVersion, String[] stopwords ) { stoptable = StopFilter.makeStopSet( stopwords ); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #ArabicAnalyzer(Version, Hashtable)} instead */ public ArabicAnalyzer( Hashtable stopwords ) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public ArabicAnalyzer( Version matchVersion, Hashtable stopwords ) { stoptable = new HashSet(stopwords.keySet()); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. Lines can be commented out using {@link #STOPWORDS_COMMENT} + * + * @deprecated Use {@link #ArabicAnalyzer(Version, File)} instead */ public ArabicAnalyzer( File stopwords ) throws IOException { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. Lines can be commented out using {@link #STOPWORDS_COMMENT} + */ + public ArabicAnalyzer( Version matchVersion, File stopwords ) throws IOException { stoptable = WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT); + this.matchVersion = matchVersion; } @@ -117,7 +160,8 @@ public final TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new ArabicLetterTokenizer( reader ); result = new LowerCaseFilter(result); - result = new StopFilter( result, stoptable ); + result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable ); result = new ArabicNormalizationFilter( result ); result = new ArabicStemFilter( result ); @@ -144,7 +188,8 @@ streams = new SavedStreams(); streams.source = new ArabicLetterTokenizer(reader); streams.result = new LowerCaseFilter(streams.source); - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); streams.result = new ArabicNormalizationFilter(streams.result); streams.result = new ArabicStemFilter(streams.result); setPreviousTokenStream(streams); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (working copy) @@ -32,6 +32,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Brazilian Portuguese language. @@ -40,6 +41,9 @@ * will not be indexed at all) and an external list of exclusions (words that will * not be stemmed, but indexed). *

+ * + *

NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.

*/ public final class BrazilianAnalyzer extends Analyzer { @@ -77,36 +81,76 @@ * Contains words that should be indexed but not stemmed. */ private Set excltable = new HashSet(); + private final Version matchVersion; /** * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}). + * + * @deprecated Use {@link #BrazilianAnalyzer(Version)} instead */ public BrazilianAnalyzer() { - stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS ); + this(Version.LUCENE_23); } /** + * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}). + */ + public BrazilianAnalyzer(Version matchVersion) { + stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS ); + this.matchVersion = matchVersion; + } + + /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #BrazilianAnalyzer(Version, String[])} instead */ public BrazilianAnalyzer( String[] stopwords ) { - stoptable = StopFilter.makeStopSet( stopwords ); + this(Version.LUCENE_23, stopwords); } /** * Builds an analyzer with the given stop words. */ + public BrazilianAnalyzer( Version matchVersion, String[] stopwords ) { + stoptable = StopFilter.makeStopSet( stopwords ); + this.matchVersion = matchVersion; + } + + /** + * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #BrazilianAnalyzer(Version, Map)} instead + */ public BrazilianAnalyzer( Map stopwords ) { - stoptable = new HashSet(stopwords.keySet()); + this(Version.LUCENE_23, stopwords); } /** * Builds an analyzer with the given stop words. */ + public BrazilianAnalyzer( Version matchVersion, Map stopwords ) { + stoptable = new HashSet(stopwords.keySet()); + this.matchVersion = matchVersion; + } + + /** + * Builds an analyzer with the given stop words. + * @deprecated Use {@link #BrazilianAnalyzer(Version, File)} instead + */ public BrazilianAnalyzer( File stopwords ) throws IOException { - stoptable = WordlistLoader.getWordSet( stopwords ); + this(Version.LUCENE_23, stopwords); } /** + * Builds an analyzer with the given stop words. + */ + public BrazilianAnalyzer( Version matchVersion, File stopwords ) throws IOException { + stoptable = WordlistLoader.getWordSet( stopwords ); + this.matchVersion = matchVersion; + } + + /** * Builds an exclusionlist from an array of Strings. */ public void setStemExclusionTable( String[] exclusionlist ) { @@ -136,10 +180,11 @@ * {@link BrazilianStemFilter}. */ public final TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer( reader ); + TokenStream result = new StandardTokenizer( matchVersion, reader ); result = new LowerCaseFilter( result ); result = new StandardFilter( result ); - result = new StopFilter( result, stoptable ); + result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable ); result = new BrazilianStemFilter( result, excltable ); return result; } @@ -162,10 +207,11 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new LowerCaseFilter(streams.source); streams.result = new StandardFilter(streams.result); - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); streams.result = new BrazilianStemFilter(streams.result, excltable); setPreviousTokenStream(streams); } else { Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -48,15 +49,27 @@ //The default maximum percentage (40%) of index documents which //can contain a term, after which the term is considered to be a stop word. public static final float defaultMaxDocFreqPercent = 0.4f; + private final Version matchVersion; /** * Initializes this analyzer with the Analyzer object that actually produces the tokens * * @param delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering + * @deprecated Use {@link #QueryAutoStopWordAnalyzer(Version, Analyzer)} instead */ public QueryAutoStopWordAnalyzer(Analyzer delegate) { + this(Version.LUCENE_24, delegate); + } + + /** + * Initializes this analyzer with the Analyzer object that actually produces the tokens + * + * @param delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering + */ + public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer delegate) { this.delegate = delegate; setOverridesTokenStreamMethod(QueryAutoStopWordAnalyzer.class); + this.matchVersion = matchVersion; } /** @@ -175,7 +188,8 @@ } HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName); if (stopWords != null) { - result = new StopFilter(result, stopWords); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopWords); } return result; } @@ -217,7 +231,8 @@ /* if there are any stopwords for the field, save the stopfilter */ HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName); if (stopWords != null) - streams.withStopFilter = new StopFilter(streams.wrapped, stopWords); + streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.wrapped, stopWords); else streams.withStopFilter = streams.wrapped; @@ -238,7 +253,8 @@ streams.wrapped = result; HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName); if (stopWords != null) - streams.withStopFilter = new StopFilter(streams.wrapped, stopWords); + streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.wrapped, stopWords); else streams.withStopFilter = streams.wrapped; } Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -56,23 +57,45 @@ * stop word list */ private Set stopTable; + private final Version matchVersion; //~ Constructors ----------------------------------------------------------- /** * Builds an analyzer which removes words in {@link #STOP_WORDS}. + * + * @deprecated Use {@link #CJKAnalyzer(Version)} instead */ public CJKAnalyzer() { + this(Version.LUCENE_24); + } + + /** + * Builds an analyzer which removes words in {@link #STOP_WORDS}. + */ + public CJKAnalyzer(Version matchVersion) { stopTable = StopFilter.makeStopSet(STOP_WORDS); + this.matchVersion = matchVersion; } /** * Builds an analyzer which removes words in the provided array. * * @param stopWords stop word array + * @deprecated Use {@link #CJKAnalyzer(Version, String[])} instead */ public CJKAnalyzer(String[] stopWords) { + this(Version.LUCENE_24, stopWords); + } + + /** + * Builds an analyzer which removes words in the provided array. + * + * @param stopWords stop word array + */ + public CJKAnalyzer(Version matchVersion, String[] stopWords) { stopTable = StopFilter.makeStopSet(stopWords); + this.matchVersion = matchVersion; } //~ Methods ---------------------------------------------------------------- @@ -86,7 +109,8 @@ * {@link StopFilter} */ public final TokenStream tokenStream(String fieldName, Reader reader) { - return new StopFilter(new CJKTokenizer(reader), stopTable); + return new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + new CJKTokenizer(reader), stopTable); } private class SavedStreams { @@ -109,7 +133,8 @@ if (streams == null) { streams = new SavedStreams(); streams.source = new CJKTokenizer(reader); - streams.result = new StopFilter(streams.source, stopTable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.source, stopTable); setPreviousTokenStream(streams); } else { streams.source.reset(reader); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; import java.io.File; import java.io.IOException; @@ -43,6 +44,17 @@ * exclusion list is empty by default. *

* + * + *

You must specify the required {@link Version} + * compatibility when creating FrenchAnalyzer: + *

    + *
  • As of 2.9, StopFilter preserves position + * increments + *
+ * + *

NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.

+ * * @version $Id$ */ public final class FrenchAnalyzer extends Analyzer { @@ -84,26 +96,60 @@ */ private Set excltable = new HashSet(); + private final Version matchVersion; + /** * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}). + * + * @deprecated Use {@link #FrenchAnalyzer(Version)} instead. */ public FrenchAnalyzer() { + this(Version.LUCENE_23); + } + + /** + * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}). + */ + public FrenchAnalyzer(Version matchVersion) { stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #FrenchAnalyzer(Version, + * String[])} instead. */ public FrenchAnalyzer(String[] stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public FrenchAnalyzer(Version matchVersion, String[] stopwords) { stoptable = StopFilter.makeStopSet(stopwords); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. * @throws IOException + * + * @deprecated Use {@link #FrenchAnalyzer(Version, File)} instead */ public FrenchAnalyzer(File stopwords) throws IOException { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * @throws IOException + */ + public FrenchAnalyzer(Version matchVersion, File stopwords) throws IOException { stoptable = new HashSet(WordlistLoader.getWordSet(stopwords)); + this.matchVersion = matchVersion; } /** @@ -144,9 +190,10 @@ if (fieldName == null) throw new IllegalArgumentException("fieldName must not be null"); if (reader == null) throw new IllegalArgumentException("reader must not be null"); - TokenStream result = new StandardTokenizer(reader); + TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(result); - result = new StopFilter(result, stoptable); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable); result = new FrenchStemFilter(result, excltable); // Convert to lowercase after stemming! result = new LowerCaseFilter(result); @@ -171,9 +218,10 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); streams.result = new FrenchStemFilter(streams.result, excltable); // Convert to lowercase after stemming! streams.result = new LowerCaseFilter(streams.result); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; import java.io.File; import java.io.IOException; @@ -42,6 +43,9 @@ * A default set of stopwords is used unless an alternative list is specified, but the * exclusion list is empty by default. *

+ * + *

NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.

*/ public class DutchAnalyzer extends Analyzer { /** @@ -73,48 +77,92 @@ private Set excltable = new HashSet(); private Map stemdict = new HashMap(); + private final Version matchVersion; + /** + * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}) + * and a few default entries for the stem exclusion table. + * + * @deprecated Use {@link #DutchAnalyzer(Version)} instead + */ + public DutchAnalyzer() { + this(Version.LUCENE_23); + } /** * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}) * and a few default entries for the stem exclusion table. * */ - public DutchAnalyzer() { + public DutchAnalyzer(Version matchVersion) { setOverridesTokenStreamMethod(DutchAnalyzer.class); stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS); stemdict.put("fiets", "fiets"); //otherwise fiet stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet stemdict.put("ei", "eier"); stemdict.put("kind", "kinder"); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. * * @param stopwords + * @deprecated Use {@link #DutchAnalyzer(Version, String[])} instead */ public DutchAnalyzer(String[] stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param matchVersion + * @param stopwords + */ + public DutchAnalyzer(Version matchVersion, String[] stopwords) { setOverridesTokenStreamMethod(DutchAnalyzer.class); stoptable = StopFilter.makeStopSet(stopwords); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. * * @param stopwords + * @deprecated Use {@link #DutchAnalyzer(Version, HashSet)} instead */ public DutchAnalyzer(HashSet stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param stopwords + */ + public DutchAnalyzer(Version matchVersion, HashSet stopwords) { setOverridesTokenStreamMethod(DutchAnalyzer.class); stoptable = stopwords; + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. * * @param stopwords + * @deprecated Use {@link #DutchAnalyzer(Version, File)} instead */ public DutchAnalyzer(File stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param stopwords + */ + public DutchAnalyzer(Version matchVersion, File stopwords) { setOverridesTokenStreamMethod(DutchAnalyzer.class); try { stoptable = org.apache.lucene.analysis.WordlistLoader.getWordSet(stopwords); @@ -122,6 +170,7 @@ // TODO: throw IOException throw new RuntimeException(e); } + this.matchVersion = matchVersion; } /** @@ -179,9 +228,10 @@ * and {@link DutchStemFilter} */ public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(reader); + TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(result); - result = new StopFilter(result, stoptable); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable); result = new DutchStemFilter(result, excltable, stemdict); return result; } @@ -211,9 +261,10 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); streams.result = new DutchStemFilter(streams.result, excltable, stemdict); setPreviousTokenStream(streams); } else { Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java (working copy) @@ -25,22 +25,34 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words. * @version 0.2 + * + *

NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.

*/ public class ThaiAnalyzer extends Analyzer { + private final Version matchVersion; + + /** @deprecated Use {@link #ThaiAnalyzer(Version)} instead */ + public ThaiAnalyzer() { + this(Version.LUCENE_23); + } - public ThaiAnalyzer() { + public ThaiAnalyzer(Version matchVersion) { setOverridesTokenStreamMethod(ThaiAnalyzer.class); + this.matchVersion = matchVersion; } public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream ts = new StandardTokenizer(reader); + TokenStream ts = new StandardTokenizer(matchVersion, reader); ts = new StandardFilter(ts); ts = new ThaiWordFilter(ts); - ts = new StopFilter(ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET); + ts = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET); return ts; } @@ -60,10 +72,11 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new ThaiWordFilter(streams.result); - streams.result = new StopFilter(streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); setPreviousTokenStream(streams); } else { streams.source.reset(reader); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; import java.io.*; import java.util.HashSet; @@ -37,6 +38,9 @@ * will not be indexed at all). * A default set of stopwords is used unless an alternative list is specified. *

+ * + *

NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.

*/ public final class CzechAnalyzer extends Analyzer { @@ -68,32 +72,70 @@ * Contains the stopwords used with the {@link StopFilter}. */ private Set stoptable; + private final Version matchVersion; /** * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}). + * + * @deprecated Use {@link #CzechAnalyzer(Version)} instead */ public CzechAnalyzer() { - stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS ); + this(Version.LUCENE_23); } + /** + * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}). + */ + public CzechAnalyzer(Version matchVersion) { + stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS ); + this.matchVersion = matchVersion; + } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #CzechAnalyzer(Version, String[])} instead */ public CzechAnalyzer( String[] stopwords ) { - stoptable = StopFilter.makeStopSet( stopwords ); + this(Version.LUCENE_23, stopwords); } + /** + * Builds an analyzer with the given stop words. + */ + public CzechAnalyzer(Version matchVersion, String[] stopwords) { + stoptable = StopFilter.makeStopSet( stopwords ); + this.matchVersion = matchVersion; + } + + /** + * @deprecated Use {@link #CzechAnalyzer(Version, HashSet)} instead + */ public CzechAnalyzer( HashSet stopwords ) { - stoptable = stopwords; + this(Version.LUCENE_23, stopwords); } + public CzechAnalyzer(Version matchVersion, HashSet stopwords) { + stoptable = stopwords; + this.matchVersion = matchVersion; + } + /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #CzechAnalyzer(Version, File)} instead */ public CzechAnalyzer( File stopwords ) throws IOException { - stoptable = WordlistLoader.getWordSet( stopwords ); + this(Version.LUCENE_23, stopwords); } + /** + * Builds an analyzer with the given stop words. + */ + public CzechAnalyzer(Version matchVersion, File stopwords ) throws IOException { + stoptable = WordlistLoader.getWordSet( stopwords ); + this.matchVersion = matchVersion; + } + /** * Loads stopwords hash from resource stream (file, database...). * @param wordfile File containing the wordlist @@ -135,10 +177,11 @@ * {@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter} */ public final TokenStream tokenStream( String fieldName, Reader reader ) { - TokenStream result = new StandardTokenizer( reader ); + TokenStream result = new StandardTokenizer( matchVersion, reader ); result = new StandardFilter( result ); result = new LowerCaseFilter( result ); - result = new StopFilter( result, stoptable ); + result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable ); return result; } @@ -159,10 +202,11 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new LowerCaseFilter(streams.result); - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); setPreviousTokenStream(streams); } else { streams.source.reset(reader);