Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 828807)
+++ CHANGES.txt (working copy)
@@ -42,6 +42,11 @@
settings in StandardAnalyzer to be compatible with the coming
Lucene 3.0. (Uwe Schindler)
+ * LUCENE-2002: Add required Version matchVersion argument when
+ constructing QueryParser or MultiFieldQueryParser and, default (as
+ of 2.9) enablePositionIncrements to true to match
+ StandardAnalyzer's 2.9 default (Uwe Schindler, Mike McCandless)
+
Documentation
* LUCENE-1955: Fix Hits deprecation notice to point users in right
Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java
===================================================================
--- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 828807)
+++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy)
@@ -47,6 +47,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.FuzzyQuery;
@@ -60,7 +61,10 @@
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.util.LocalizedTestCase;
+import org.apache.lucene.util.Version;
/**
* Tests QueryParser.
@@ -1014,4 +1018,46 @@
BooleanQuery.setMaxClauseCount(originalMaxClauses);
}
+ // LUCENE-2002: make sure defaults for StandardAnalyzer's
+ // enableStopPositionIncr & QueryParser's enablePosIncr
+ // "match"
+ public void testPositionIncrements() throws Exception {
+ Directory dir = new MockRAMDirectory();
+ Analyzer a = new StandardAnalyzer(Version.LUCENE_CURRENT);
+ IndexWriter w = new IndexWriter(dir, a, IndexWriter.MaxFieldLength.UNLIMITED);
+ Document doc = new Document();
+ doc.add(new Field("f", "the wizard of ozzy", Field.Store.NO, Field.Index.ANALYZED));
+ w.addDocument(doc);
+ IndexReader r = w.getReader();
+ w.close();
+ IndexSearcher s = new IndexSearcher(r);
+ QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "f", a);
+ Query q = qp.parse("\"wizard of ozzy\"");
+ assertEquals(1, s.search(q, 1).totalHits);
+ r.close();
+ dir.close();
+ }
+
+ // LUCENE-2002: unfortunately, whenever you regen
+ // QueryParser (with javacc), you have to manually edit
+ // these constructors to make them protected not public:
+ //
+ // protected QueryParser(CharStream stream)
+ //
+ // protected QueryParser(QueryParserTokenManager tm)
+ public void testProtectedCtors() throws Exception {
+ try {
+ QueryParser.class.getConstructor(new Class[] {CharStream.class});
+ fail("please switch public QueryParser(CharStream) to be protected");
+ } catch (NoSuchMethodException nsme) {
+ // expected
+ }
+ try {
+ QueryParser.class.getConstructor(new Class[] {QueryParserTokenManager.class});
+ fail("please switch public QueryParser(QueryParserTokenManager) to be protected");
+ } catch (NoSuchMethodException nsme) {
+ // expected
+ }
+ }
+
}
Index: src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (revision 828807)
+++ src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (working copy)
@@ -5,6 +5,7 @@
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.Version;
import java.io.StringReader;
@@ -108,15 +109,24 @@
}
public void testDomainNames() throws Exception {
- // Don't reuse a because we alter its state (setReplaceInvalidAcronym)
- StandardAnalyzer a2 = new StandardAnalyzer();
+ // Don't reuse a because we alter its state
+ // (setReplaceInvalidAcronym)
+
+ // Current lucene should not show the bug
+ StandardAnalyzer a2 = new StandardAnalyzer(Version.LUCENE_CURRENT);
// domain names
assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"});
//Notice the trailing . See https://issues.apache.org/jira/browse/LUCENE-1068.
// the following should be recognized as HOST:
assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { " In other words, all the query's terms must appear, but it doesn't matter in
* what fields they appear. It will, when parse(String query)
+ * is called, construct a query like this (assuming the query consists of
+ * two terms and you specify the two fields When setDefaultOperator(AND_OPERATOR) is set, the result will be: When you pass a boost (title=>5 body=>10) you can get In other words, all the query's terms must appear, but it doesn't matter in
+ * what fields they appear. It will, when parse(String query)
@@ -89,9 +124,35 @@
*
* In other words, all the query's terms must appear, but it doesn't matter in
* what fields they appear. It will, when parse(String query)
+ * is called, construct a query like this (assuming the query consists of
+ * two terms and you specify the two fields When setDefaultOperator(AND_OPERATOR) is set, the result will be: In other words, all the query's terms must appear, but it doesn't matter in
+ * what fields they appear.
+ * If x fields are specified, this effectively constructs:
+ *
+ * The code above would construct a query:
+ *
+ * The code above would construct a query:
+ * NOTE: there is a new QueryParser in contrib, which matches
* the same syntax as this class, but is more modular,
* enabling substantial customization to how a query is created.
+ *
+ *
+ * NOTE: You must specify the required {@link Version}
+ * compatibility when creating QueryParser:
+ * NOTE: there is a new QueryParser in contrib, which matches
* the same syntax as this class, but is more modular,
* enabling substantial customization to how a query is created.
+ *
+ *
+ * NOTE: You must specify the required {@link Version}
+ * compatibility when creating QueryParser:
+ * Many applications have specific tokenizer needs. If this tokenizer does
* not suit your application, please consider copying this source code
* directory to your project and maintaining your own grammar-based tokenizer.
+ *
+ *
+ * You must specify the required {@link Version}
+ * compatibility when creating StandardAnalyzer:
+ *
* Note: behavior of a single StopFilter instance can be modified
Index: build.xml
===================================================================
--- build.xml (revision 828807)
+++ build.xml (working copy)
@@ -587,9 +587,21 @@
NOTE: This class uses the same {@link Version}
+ * dependent settings as {@link StandardAnalyzer}.
@@ -59,15 +60,25 @@
public class SmartChineseAnalyzer extends Analyzer {
private final Set stopWords;
+ private final Version matchVersion;
/**
* Create a new SmartChineseAnalyzer, using the default stopword list.
+ *
+ * @deprecated Use {@link #SmartChineseAnalyzer(Version)} instead
*/
public SmartChineseAnalyzer() {
- this(true);
+ this(Version.LUCENE_24, true);
}
/**
+ * Create a new SmartChineseAnalyzer, using the default stopword list.
+ */
+ public SmartChineseAnalyzer(Version matchVersion) {
+ this(matchVersion, true);
+ }
+
+ /**
*
* Create a new SmartChineseAnalyzer, optionally using the default stopword list.
*
+ * Create a new SmartChineseAnalyzer, optionally using the default stopword list.
+ *
+ * The included default stopword list is simply a list of punctuation.
+ * If you do not use this list, punctuation will not be removed from the text!
+ * title and body):
+ * (title:term1 body:term1) (title:term2 body:term2)
+ *
+ *
+ *
+ * +(title:term1 body:term1) +(title:term2 body:term2)
+ *
+ *
+ *
+ * +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
+ *
+ *
+ * title and body):
+ * (title:term1 body:term1) (title:term2 body:term2)
+ *
+ *
+ *
+ * +(title:term1 body:term1) +(title:term2 body:term2)
+ *
+ *
+ *
+ *
+ * @param matchVersion Lucene version to match; this is passed through to QueryParser.
+ * @param queries Queries strings to parse
+ * @param fields Fields to search on
+ * @param analyzer Analyzer to use
+ * @throws ParseException if query parsing fails
+ * @throws IllegalArgumentException if the length of the queries array differs
+ * from the length of the fields array
+ */
+ public static Query parse(Version matchVersion, String[] queries, String[] fields,
+ Analyzer analyzer) throws ParseException
+ {
if (queries.length != fields.length)
throw new IllegalArgumentException("queries.length != fields.length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++)
{
- QueryParser qp = new QueryParser(fields[i], analyzer);
+ QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
Query q = qp.parse(queries[i]);
if (q!=null && // q never null, just being defensive
(!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) {
@@ -250,14 +335,51 @@
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the fields array differs
* from the length of the flags array
+ * @deprecated Use {@link #parse(Version, String, String[], BooleanClause.Occur[], Analyzer)} instead
*/
public static Query parse(String query, String[] fields,
BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException {
+ return parse(Version.LUCENE_24, query, fields, flags, analyzer);
+ }
+
+ /**
+ * Parses a query, searching on the fields specified.
+ * Use this if you need to specify certain fields as required,
+ * and others as prohibited.
+ *
+ * (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+ *
+ *
+ * Usage:
+ *
+ *
+ * String[] fields = {"filename", "contents", "description"};
+ * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ * BooleanClause.Occur.MUST,
+ * BooleanClause.Occur.MUST_NOT};
+ * MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+ *
+ *
+ *
+ *
+ * @param matchVersion Lucene version to match; this is passed through to QueryParser.
+ * @param query Query string to parse
+ * @param fields Fields to search on
+ * @param flags Flags describing the fields
+ * @param analyzer Analyzer to use
+ * @throws ParseException if query parsing fails
+ * @throws IllegalArgumentException if the length of the fields array differs
+ * from the length of the flags array
+ */
+ public static Query parse(Version matchVersion, String query, String[] fields,
+ BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException {
if (fields.length != flags.length)
throw new IllegalArgumentException("fields.length != flags.length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++) {
- QueryParser qp = new QueryParser(fields[i], analyzer);
+ QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
Query q = qp.parse(query);
if (q!=null && // q never null, just being defensive
(!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) {
@@ -297,16 +419,55 @@
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the queries, fields,
* and flags array differ
+ * @deprecated Used {@link #parse(Version, String[], String[], BooleanClause.Occur[], Analyzer)} instead
*/
public static Query parse(String[] queries, String[] fields, BooleanClause.Occur[] flags,
Analyzer analyzer) throws ParseException
{
+ return parse(Version.LUCENE_24, queries, fields, flags, analyzer);
+ }
+
+ /**
+ * Parses a query, searching on the fields specified.
+ * Use this if you need to specify certain fields as required,
+ * and others as prohibited.
+ *
+ * (filename:query) +(contents:query) -(description:query)
+ *
+ *
+ * Usage:
+ *
+ *
+ * String[] query = {"query1", "query2", "query3"};
+ * String[] fields = {"filename", "contents", "description"};
+ * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ * BooleanClause.Occur.MUST,
+ * BooleanClause.Occur.MUST_NOT};
+ * MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+ *
+ *
+ *
+ *
+ * @param matchVersion Lucene version to match; this is passed through to QueryParser.
+ * @param queries Queries string to parse
+ * @param fields Fields to search on
+ * @param flags Flags describing the fields
+ * @param analyzer Analyzer to use
+ * @throws ParseException if query parsing fails
+ * @throws IllegalArgumentException if the length of the queries, fields,
+ * and flags array differ
+ */
+ public static Query parse(Version matchVersion, String[] queries, String[] fields, BooleanClause.Occur[] flags,
+ Analyzer analyzer) throws ParseException
+ {
if (!(queries.length == fields.length && queries.length == flags.length))
throw new IllegalArgumentException("queries, fields, and flags array have have different length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++)
{
- QueryParser qp = new QueryParser(fields[i], analyzer);
+ QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
Query q = qp.parse(queries[i]);
if (q!=null && // q never null, just being defensive
(!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) {
Index: src/java/org/apache/lucene/queryParser/QueryParser.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 828807)
+++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy)
@@ -35,6 +35,7 @@
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Parameter;
+import org.apache.lucene.util.Version;
/**
* This class is generated by JavaCC. The most important method is
@@ -101,6 +102,14 @@
*
+ * (filename:query1) +(contents:query2) -(description:query3)
+ *
+ *
+ *
*/
public class QueryParser implements QueryParserConstants {
@@ -125,7 +134,7 @@
boolean lowercaseExpandedTerms = true;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
boolean allowLeadingWildcard = false;
- boolean enablePositionIncrements = false;
+ boolean enablePositionIncrements = true;
Analyzer analyzer;
String field;
@@ -158,11 +167,27 @@
/** Constructs a query parser.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
+ * @deprecated Use {@link #QueryParser(String, Analyzer,
+ * Version)} instead.
*/
public QueryParser(String f, Analyzer a) {
+ this(Version.LUCENE_24, f, a);
+ }
+
+ /** Constructs a query parser.
+ * @param matchVersion Lucene version to match. See {@link above)
+ * @param f the default field for query terms.
+ * @param a used to find terms in the query text.
+ */
+ public QueryParser(Version matchVersion, String f, Analyzer a) {
this(new FastCharStream(new StringReader("")));
analyzer = a;
field = f;
+ if (matchVersion.onOrAfter(Version.LUCENE_29)) {
+ enablePositionIncrements = true;
+ } else {
+ enablePositionIncrements = false;
+ }
}
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
@@ -759,7 +784,7 @@
DateTools.Resolution resolution = getDateResolution(field);
if (resolution == null) {
// no default or field specific date resolution has been set,
- // use deprecated DateField to maintain compatibilty with
+ // use deprecated DateField to maintain compatibility with
// pre-1.9 Lucene versions.
part1 = DateField.dateToString(d1);
part2 = DateField.dateToString(d2);
@@ -1155,7 +1180,7 @@
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
System.exit(0);
}
- QueryParser qp = new QueryParser("field",
+ QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
System.out.println(q.toString("field"));
@@ -1591,6 +1616,12 @@
finally { jj_save(0, xla); }
}
+ private boolean jj_3R_2() {
+ if (jj_scan_token(TERM)) return true;
+ if (jj_scan_token(COLON)) return true;
+ return false;
+ }
+
private boolean jj_3_1() {
Token xsp;
xsp = jj_scanpos;
@@ -1607,12 +1638,6 @@
return false;
}
- private boolean jj_3R_2() {
- if (jj_scan_token(TERM)) return true;
- if (jj_scan_token(COLON)) return true;
- return false;
- }
-
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */
@@ -1641,7 +1666,7 @@
private int jj_gc = 0;
/** Constructor with user supplied CharStream. */
- public QueryParser(CharStream stream) {
+ protected QueryParser(CharStream stream) {
token_source = new QueryParserTokenManager(stream);
token = new Token();
jj_ntk = -1;
@@ -1661,7 +1686,7 @@
}
/** Constructor with generated Token Manager. */
- public QueryParser(QueryParserTokenManager tm) {
+ protected QueryParser(QueryParserTokenManager tm) {
token_source = tm;
token = new Token();
jj_ntk = -1;
Index: src/java/org/apache/lucene/queryParser/QueryParser.jj
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 828807)
+++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy)
@@ -59,6 +59,7 @@
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Parameter;
+import org.apache.lucene.util.Version;
/**
* This class is generated by JavaCC. The most important method is
@@ -125,6 +126,14 @@
*
+ *
*/
public class QueryParser {
@@ -149,7 +158,7 @@
boolean lowercaseExpandedTerms = true;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
boolean allowLeadingWildcard = false;
- boolean enablePositionIncrements = false;
+ boolean enablePositionIncrements = true;
Analyzer analyzer;
String field;
@@ -182,11 +191,27 @@
/** Constructs a query parser.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
+ * @deprecated Use {@link #QueryParser(String, Analyzer,
+ * Version)} instead.
*/
public QueryParser(String f, Analyzer a) {
+ this(Version.LUCENE_24, f, a);
+ }
+
+ /** Constructs a query parser.
+ * @param matchVersion Lucene version to match. See {@link above)
+ * @param f the default field for query terms.
+ * @param a used to find terms in the query text.
+ */
+ public QueryParser(Version matchVersion, String f, Analyzer a) {
this(new FastCharStream(new StringReader("")));
analyzer = a;
field = f;
+ if (matchVersion.onOrAfter(Version.LUCENE_29)) {
+ enablePositionIncrements = true;
+ } else {
+ enablePositionIncrements = false;
+ }
}
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
@@ -1179,7 +1204,7 @@
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
System.exit(0);
}
- QueryParser qp = new QueryParser("field",
+ QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
System.out.println(q.toString("field"));
Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 828807)
+++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy)
@@ -33,6 +33,7 @@
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Parameter;
+import org.apache.lucene.util.Version;
/** Token Manager. */
public class QueryParserTokenManager implements QueryParserConstants
Index: src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (revision 828807)
+++ src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (working copy)
@@ -35,7 +35,7 @@
* compatibility when creating StandardAnalyzer:
*
*
Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
===================================================================
--- src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 828807)
+++ src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy)
@@ -27,6 +27,7 @@
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;
/** A grammar-based tokenizer constructed with JFlex
*
@@ -43,6 +44,14 @@
*
+ *
*/
public class StandardTokenizer extends Tokenizer {
@@ -107,9 +116,12 @@
/**
* Creates a new instance of the {@link StandardTokenizer}. Attaches the
* input to a newly created JFlex scanner.
+ *
+ * @deprecated Use {@link #StandardTokenizer(Version,
+ * Reader)} instead
*/
public StandardTokenizer(Reader input) {
- this(input, false);
+ this(Version.LUCENE_24, input);
}
/**
@@ -120,6 +132,8 @@
* @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms with HOST.
*
* See http://issues.apache.org/jira/browse/LUCENE-1068
+ *
+ * @deprecated Use {@link #StandardTokenizer(Version, Reader)} instead
*/
public StandardTokenizer(Reader input, boolean replaceInvalidAcronym) {
super();
@@ -128,7 +142,23 @@
}
/**
+ * Creates a new instance of the {@link org.apache.lucene.analysis.standard.StandardTokenizer}. Attaches
+ * the input to the newly created JFlex scanner.
+ *
+ * @param input The input reader
+ *
+ * See http://issues.apache.org/jira/browse/LUCENE-1068
+ */
+ public StandardTokenizer(Version matchVersion, Reader input) {
+ super();
+ this.scanner = new StandardTokenizerImpl(input);
+ init(input, matchVersion);
+ }
+
+ /**
* Creates a new StandardTokenizer with a given {@link AttributeSource}.
+ *
+ * @deprecated Use {@link #StandardTokenizer(Version, AttributeSource, Reader)} instead
*/
public StandardTokenizer(AttributeSource source, Reader input, boolean replaceInvalidAcronym) {
super(source);
@@ -137,7 +167,18 @@
}
/**
+ * Creates a new StandardTokenizer with a given {@link AttributeSource}.
+ */
+ public StandardTokenizer(Version matchVersion, AttributeSource source, Reader input) {
+ super(source);
+ this.scanner = new StandardTokenizerImpl(input);
+ init(input, matchVersion);
+ }
+
+ /**
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
+ *
+ * @deprecated Use {@link #StandardTokenizer(Version, AttributeFactory, Reader)} instead
*/
public StandardTokenizer(AttributeFactory factory, Reader input, boolean replaceInvalidAcronym) {
super(factory);
@@ -145,6 +186,15 @@
init(input, replaceInvalidAcronym);
}
+ /**
+ * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
+ */
+ public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
+ super(factory);
+ this.scanner = new StandardTokenizerImpl(input);
+ init(input, matchVersion);
+ }
+
private void init(Reader input, boolean replaceInvalidAcronym) {
this.replaceInvalidAcronym = replaceInvalidAcronym;
this.input = input;
@@ -153,6 +203,14 @@
posIncrAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
}
+
+ private void init(Reader input, Version matchVersion) {
+ if (matchVersion.onOrAfter(Version.LUCENE_24)) {
+ init(input, true);
+ } else {
+ init(input, false);
+ }
+ }
// this tokenizer generates three attributes:
// offset, positionIncrement and type
Index: src/java/org/apache/lucene/analysis/StopFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/StopFilter.java (revision 828807)
+++ src/java/org/apache/lucene/analysis/StopFilter.java (working copy)
@@ -25,6 +25,7 @@
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.queryParser.QueryParser; // for javadoc
+import org.apache.lucene.util.Version;
/**
* Removes stop words from a token stream.
@@ -243,6 +244,21 @@
}
/**
+ * Returns version-dependent default for
+ * enablePositionIncrements. Analyzers that embed
+ * StopFilter use this method when creating the
+ * StopFilter. Prior to 2.9, this returns false. On 2.9
+ * or later, it returns true.
+ */
+ public static boolean getEnablePositionIncrementsVersionDefault(Version matchVersion) {
+ if (matchVersion.onOrAfter(Version.LUCENE_29)) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
* Set the default position increments behavior of every StopFilter created from now on.
* WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")
* or other stop words
* lists .
+ *
+ * @deprecated Use {@link #PatternAnalyzer(Version, Pattern, boolean, Set)} instead
*/
public PatternAnalyzer(Pattern pattern, boolean toLowerCase, Set stopWords) {
+ this(Version.LUCENE_24, pattern, toLowerCase, stopWords);
+ }
+
+ /**
+ * Constructs a new instance with the given parameters.
+ *
+ * @param matchVersion If >= {@link Version#LUCENE_29}, StopFilter.enablePositionIncrement is set to true
+ * @param pattern
+ * a regular expression delimiting tokens
+ * @param toLowerCase
+ * if true returns tokens after applying
+ * String.toLowerCase()
+ * @param stopWords
+ * if non-null, ignores all tokens that are contained in the
+ * given stop set (after previously having applied toLowerCase()
+ * if applicable). For example, created via
+ * {@link StopFilter#makeStopSet(String[])}and/or
+ * {@link org.apache.lucene.analysis.WordlistLoader}as in
+ * WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")
+ * or other stop words
+ * lists .
+ */
+ public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, Set stopWords) {
if (pattern == null)
throw new IllegalArgumentException("pattern must not be null");
@@ -170,6 +198,7 @@
this.pattern = pattern;
this.toLowerCase = toLowerCase;
this.stopWords = stopWords;
+ this.matchVersion = matchVersion;
}
/**
@@ -197,7 +226,7 @@
}
else {
stream = new PatternTokenizer(text, pattern, toLowerCase);
- if (stopWords != null) stream = new StopFilter(false, stream, stopWords);
+ if (stopWords != null) stream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), stream, stopWords);
}
return stream;
Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
===================================================================
--- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (revision 828807)
+++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (working copy)
@@ -31,6 +31,7 @@
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
+import org.apache.lucene.util.Version;
/**
*
+ * Create a new SmartChineseAnalyzer, using the provided {@link Set} of stopwords. + *
+ *+ * Note: the set should include punctuation, unless you want to index punctuation! + *
+ * @param stopWords {@link Set} of stopwords to use. + */ + public SmartChineseAnalyzer(Version matchVersion, Set stopWords) { this.stopWords = stopWords; + this.matchVersion = matchVersion; } public TokenStream tokenStream(String fieldName, Reader reader) { @@ -114,7 +159,8 @@ // The porter stemming is too strict, this is not a bug, this is a feature:) result = new PorterStemFilter(result); if (stopWords != null) { - result = new StopFilter(result, stopWords, false); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopWords, false); } return result; } @@ -134,7 +180,8 @@ streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream); streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream); if (stopWords != null) { - streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopWords, false); + streams.filteredTokenStream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.filteredTokenStream, stopWords, false); } } else { streams.tokenStream.reset(reader); Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.Version; /** * Test case for FrenchAnalyzer. @@ -32,7 +33,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase { public void testAnalyzer() throws Exception { - FrenchAnalyzer fa = new FrenchAnalyzer(); + FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT); // test null reader boolean iaeFlag = false; Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java (working copy) @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.ar.ArabicLetterTokenizer; import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Persian. @@ -66,11 +67,24 @@ */ public static final String STOPWORDS_COMMENT = "#"; + private final Version matchVersion; + /** * Builds an analyzer with the default stop words: * {@link #DEFAULT_STOPWORD_FILE}. + * + * @deprecated Use {@link #PersianAnalyzer(Version)} instead */ public PersianAnalyzer() { + this(Version.LUCENE_24); + } + + /** + * Builds an analyzer with the default stop words: + * {@link #DEFAULT_STOPWORD_FILE}. + */ + public PersianAnalyzer(Version matchVersion) { + this.matchVersion = matchVersion; try { InputStream stream = PersianAnalyzer.class .getResourceAsStream(DEFAULT_STOPWORD_FILE); @@ -86,24 +100,55 @@ /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #PersianAnalyzer(Version, String[])} instead */ public PersianAnalyzer(String[] stopwords) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public PersianAnalyzer(Version matchVersion, String[] stopwords) { stoptable = StopFilter.makeStopSet(stopwords); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #PersianAnalyzer(Version, Hashtable)} instead */ public PersianAnalyzer(Hashtable stopwords) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public PersianAnalyzer(Version matchVersion, Hashtable stopwords) { stoptable = new HashSet(stopwords.keySet()); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. Lines can be commented out * using {@link #STOPWORDS_COMMENT} + * + * @deprecated Use {@link #PersianAnalyzer(Version, File)} instead */ public PersianAnalyzer(File stopwords) throws IOException { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. Lines can be commented out + * using {@link #STOPWORDS_COMMENT} + */ + public PersianAnalyzer(Version matchVersion, File stopwords) throws IOException { stoptable = WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT); + this.matchVersion = matchVersion; } /** @@ -125,7 +170,8 @@ * the order here is important: the stopword list is normalized with the * above! */ - result = new StopFilter(result, stoptable); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable); return result; } @@ -158,7 +204,8 @@ * the order here is important: the stopword list is normalized with the * above! */ - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); setPreviousTokenStream(streams); } else { streams.source.reset(reader); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Russian language. @@ -193,41 +194,60 @@ * @deprecated Support for non-Unicode encodings will be removed in Lucene 3.0 */ private char[] charset; + private final Version matchVersion; + /** @deprecated Use {@link #RussianAnalyzer(Version)} instead */ + public RussianAnalyzer() { + this(Version.LUCENE_24); + } - public RussianAnalyzer() { + public RussianAnalyzer(Version matchVersion) { charset = RussianCharsets.UnicodeRussian; stopSet = StopFilter.makeStopSet( makeStopWords(RussianCharsets.UnicodeRussian)); + this.matchVersion = matchVersion; } /** * Builds an analyzer. - * @deprecated Use {@link #RussianAnalyzer()} instead. + * @deprecated Use {@link #RussianAnalyzer(Version)} instead. */ public RussianAnalyzer(char[] charset) { this.charset = charset; stopSet = StopFilter.makeStopSet(makeStopWords(charset)); + matchVersion = Version.LUCENE_24; } /** * Builds an analyzer with the given stop words. - * @deprecated Use {@link #RussianAnalyzer(String[])} instead. + * @deprecated Use {@link #RussianAnalyzer(Version,String[])} instead. */ public RussianAnalyzer(char[] charset, String[] stopwords) { this.charset = charset; stopSet = StopFilter.makeStopSet(stopwords); + matchVersion = Version.LUCENE_24; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #RussianAnalyzer(Version,String[])} instead. */ public RussianAnalyzer(String[] stopwords) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public RussianAnalyzer(Version matchVersion, String[] stopwords) + { this.charset = RussianCharsets.UnicodeRussian; stopSet = StopFilter.makeStopSet(stopwords); + this.matchVersion = matchVersion; } /** Takes russian stop words and translates them to a String array, using @@ -254,22 +274,36 @@ /** * Builds an analyzer with the given stop words. * TODO: create a Set version of this ctor - * @deprecated Use {@link #RussianAnalyzer(Map)} instead. + * + * @deprecated Use {@link #RussianAnalyzer(Version, Map)} instead. */ public RussianAnalyzer(char[] charset, Map stopwords) { this.charset = charset; stopSet = new HashSet(stopwords.keySet()); + matchVersion = Version.LUCENE_24; } /** * Builds an analyzer with the given stop words. * TODO: create a Set version of this ctor + * + * @deprecated Use {@link #RussianAnalyzer(Version, Map)} instead. */ public RussianAnalyzer(Map stopwords) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * TODO: create a Set version of this ctor + */ + public RussianAnalyzer(Version matchVersion, Map stopwords) + { charset = RussianCharsets.UnicodeRussian; stopSet = new HashSet(stopwords.keySet()); + this.matchVersion = matchVersion; } /** @@ -285,7 +319,8 @@ { TokenStream result = new RussianLetterTokenizer(reader, charset); result = new RussianLowerCaseFilter(result, charset); - result = new StopFilter(result, stopSet); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopSet); result = new RussianStemFilter(result, charset); return result; } @@ -311,7 +346,8 @@ streams = new SavedStreams(); streams.source = new RussianLetterTokenizer(reader, charset); streams.result = new RussianLowerCaseFilter(streams.source, charset); - streams.result = new StopFilter(streams.result, stopSet); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stopSet); streams.result = new RussianStemFilter(streams.result, charset); setPreviousTokenStream(streams); } else { Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (working copy) @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for German language. @@ -44,6 +45,9 @@ * exclusion list is empty by default. * * + *NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.
+ * * @version $Id$ */ public class GermanAnalyzer extends Analyzer { @@ -76,37 +80,80 @@ */ private Set exclusionSet = new HashSet(); + private final Version matchVersion; + /** * Builds an analyzer with the default stop words: * {@link #GERMAN_STOP_WORDS}. + * + * @deprecated Use {@link #GermanAnalyzer(Version)} instead */ public GermanAnalyzer() { + this(Version.LUCENE_23); + } + + /** + * Builds an analyzer with the default stop words: + * {@link #GERMAN_STOP_WORDS}. + */ + public GermanAnalyzer(Version matchVersion) { stopSet = StopFilter.makeStopSet(GERMAN_STOP_WORDS); setOverridesTokenStreamMethod(GermanAnalyzer.class); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #GermanAnalyzer(Version, String[])} instead */ public GermanAnalyzer(String[] stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public GermanAnalyzer(Version matchVersion, String[] stopwords) { stopSet = StopFilter.makeStopSet(stopwords); setOverridesTokenStreamMethod(GermanAnalyzer.class); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #GermanAnalyzer(Version, Map)} instead */ public GermanAnalyzer(Map stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public GermanAnalyzer(Version matchVersion, Map stopwords) { stopSet = new HashSet(stopwords.keySet()); setOverridesTokenStreamMethod(GermanAnalyzer.class); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #GermanAnalyzer(Version, File)} instead */ public GermanAnalyzer(File stopwords) throws IOException { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public GermanAnalyzer(Version matchVersion, File stopwords) throws IOException { stopSet = WordlistLoader.getWordSet(stopwords); setOverridesTokenStreamMethod(GermanAnalyzer.class); + this.matchVersion = matchVersion; } /** @@ -141,10 +188,11 @@ * {@link GermanStemFilter} */ public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(reader); + TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); - result = new StopFilter(result, stopSet); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopSet); result = new GermanStemFilter(result, exclusionSet); return result; } @@ -173,10 +221,11 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new LowerCaseFilter(streams.result); - streams.result = new StopFilter(streams.result, stopSet); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stopSet); streams.result = new GermanStemFilter(streams.result, exclusionSet); setPreviousTokenStream(streams); } else { Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -36,6 +37,9 @@ * that will not be indexed at all). * A default set of stopwords is used unless an alternative list is specified. * + * + *NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.
*/ public final class GreekAnalyzer extends Analyzer { @@ -159,40 +163,62 @@ */ private char[] charset; + private final Version matchVersion; + + /** @deprecated Use {@link #GreekAnalyzer(Version)} instead */ public GreekAnalyzer() { + this(Version.LUCENE_23); + } + + public GreekAnalyzer(Version matchVersion) { charset = GreekCharsets.UnicodeGreek; stopSet = StopFilter.makeStopSet( makeStopWords(GreekCharsets.UnicodeGreek)); + this.matchVersion = matchVersion; } /** * Builds an analyzer. - * @deprecated Use {@link #GreekAnalyzer()} instead. + * @deprecated Use {@link #GreekAnalyzer(Version)} instead. */ public GreekAnalyzer(char[] charset) { this.charset = charset; stopSet = StopFilter.makeStopSet(makeStopWords(charset)); + matchVersion = Version.LUCENE_23; } /** * Builds an analyzer with the given stop words. * @param stopwords Array of stopwords to use. + * + * @deprecated Use {@link #GreekAnalyzer(Version, String[])} instead */ public GreekAnalyzer(String [] stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * @param stopwords Array of stopwords to use. + */ + public GreekAnalyzer(Version matchVersion, String [] stopwords) + { charset = GreekCharsets.UnicodeGreek; stopSet = StopFilter.makeStopSet(stopwords); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. - * @deprecated Use {@link #GreekAnalyzer(String[])} instead. + * @deprecated Use {@link #GreekAnalyzer(Version, String[])} instead. */ public GreekAnalyzer(char[] charset, String[] stopwords) { this.charset = charset; stopSet = StopFilter.makeStopSet(stopwords); + matchVersion = Version.LUCENE_23; } /** @@ -219,21 +245,33 @@ /** * Builds an analyzer with the given stop words. - * @deprecated Use {@link #GreekAnalyzer(Map)} instead. + * @deprecated Use {@link #GreekAnalyzer(Version, Map)} instead. */ public GreekAnalyzer(char[] charset, Map stopwords) { this.charset = charset; stopSet = new HashSet(stopwords.keySet()); + matchVersion = Version.LUCENE_23; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #GreekAnalyzer(Version,Map)} instead */ public GreekAnalyzer(Map stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public GreekAnalyzer(Version matchVersion, Map stopwords) + { charset = GreekCharsets.UnicodeGreek; stopSet = new HashSet(stopwords.keySet()); + this.matchVersion = matchVersion; } /** @@ -244,9 +282,10 @@ */ public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(reader); + TokenStream result = new StandardTokenizer(matchVersion, reader); result = new GreekLowerCaseFilter(result, charset); - result = new StopFilter(result, stopSet); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopSet); return result; } @@ -267,9 +306,10 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new GreekLowerCaseFilter(streams.source, charset); - streams.result = new StopFilter(streams.result, stopSet); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stopSet); setPreviousTokenStream(streams); } else { streams.source.reset(reader); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (working copy) @@ -32,6 +32,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WordlistLoader; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Arabic. @@ -69,10 +70,22 @@ */ public static final String STOPWORDS_COMMENT = "#"; + private final Version matchVersion; + /** * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. + * + * @deprecated Use {@link #ArabicAnalyzer(Version)} instead */ public ArabicAnalyzer() { + this(Version.LUCENE_24); + } + + /** + * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. + */ + public ArabicAnalyzer(Version matchVersion) { + this.matchVersion = matchVersion; try { InputStream stream = ArabicAnalyzer.class.getResourceAsStream(DEFAULT_STOPWORD_FILE); InputStreamReader reader = new InputStreamReader(stream, "UTF-8"); @@ -87,23 +100,53 @@ /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #ArabicAnalyzer(Version, String[])} instead */ public ArabicAnalyzer( String[] stopwords ) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public ArabicAnalyzer( Version matchVersion, String[] stopwords ) { stoptable = StopFilter.makeStopSet( stopwords ); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #ArabicAnalyzer(Version, Hashtable)} instead */ public ArabicAnalyzer( Hashtable stopwords ) { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public ArabicAnalyzer( Version matchVersion, Hashtable stopwords ) { stoptable = new HashSet(stopwords.keySet()); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. Lines can be commented out using {@link #STOPWORDS_COMMENT} + * + * @deprecated Use {@link #ArabicAnalyzer(Version, File)} instead */ public ArabicAnalyzer( File stopwords ) throws IOException { + this(Version.LUCENE_24, stopwords); + } + + /** + * Builds an analyzer with the given stop words. Lines can be commented out using {@link #STOPWORDS_COMMENT} + */ + public ArabicAnalyzer( Version matchVersion, File stopwords ) throws IOException { stoptable = WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT); + this.matchVersion = matchVersion; } @@ -117,7 +160,8 @@ public final TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new ArabicLetterTokenizer( reader ); result = new LowerCaseFilter(result); - result = new StopFilter( result, stoptable ); + result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable ); result = new ArabicNormalizationFilter( result ); result = new ArabicStemFilter( result ); @@ -144,7 +188,8 @@ streams = new SavedStreams(); streams.source = new ArabicLetterTokenizer(reader); streams.result = new LowerCaseFilter(streams.source); - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); streams.result = new ArabicNormalizationFilter(streams.result); streams.result = new ArabicStemFilter(streams.result); setPreviousTokenStream(streams); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (working copy) @@ -32,6 +32,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Brazilian Portuguese language. @@ -40,6 +41,9 @@ * will not be indexed at all) and an external list of exclusions (words that will * not be stemmed, but indexed). * + * + *NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.
*/ public final class BrazilianAnalyzer extends Analyzer { @@ -77,36 +81,76 @@ * Contains words that should be indexed but not stemmed. */ private Set excltable = new HashSet(); + private final Version matchVersion; /** * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}). + * + * @deprecated Use {@link #BrazilianAnalyzer(Version)} instead */ public BrazilianAnalyzer() { - stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS ); + this(Version.LUCENE_23); } /** + * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}). + */ + public BrazilianAnalyzer(Version matchVersion) { + stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS ); + this.matchVersion = matchVersion; + } + + /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #BrazilianAnalyzer(Version, String[])} instead */ public BrazilianAnalyzer( String[] stopwords ) { - stoptable = StopFilter.makeStopSet( stopwords ); + this(Version.LUCENE_23, stopwords); } /** * Builds an analyzer with the given stop words. */ + public BrazilianAnalyzer( Version matchVersion, String[] stopwords ) { + stoptable = StopFilter.makeStopSet( stopwords ); + this.matchVersion = matchVersion; + } + + /** + * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #BrazilianAnalyzer(Version, Map)} instead + */ public BrazilianAnalyzer( Map stopwords ) { - stoptable = new HashSet(stopwords.keySet()); + this(Version.LUCENE_23, stopwords); } /** * Builds an analyzer with the given stop words. */ + public BrazilianAnalyzer( Version matchVersion, Map stopwords ) { + stoptable = new HashSet(stopwords.keySet()); + this.matchVersion = matchVersion; + } + + /** + * Builds an analyzer with the given stop words. + * @deprecated Use {@link #BrazilianAnalyzer(Version, File)} instead + */ public BrazilianAnalyzer( File stopwords ) throws IOException { - stoptable = WordlistLoader.getWordSet( stopwords ); + this(Version.LUCENE_23, stopwords); } /** + * Builds an analyzer with the given stop words. + */ + public BrazilianAnalyzer( Version matchVersion, File stopwords ) throws IOException { + stoptable = WordlistLoader.getWordSet( stopwords ); + this.matchVersion = matchVersion; + } + + /** * Builds an exclusionlist from an array of Strings. */ public void setStemExclusionTable( String[] exclusionlist ) { @@ -136,10 +180,11 @@ * {@link BrazilianStemFilter}. */ public final TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer( reader ); + TokenStream result = new StandardTokenizer( matchVersion, reader ); result = new LowerCaseFilter( result ); result = new StandardFilter( result ); - result = new StopFilter( result, stoptable ); + result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable ); result = new BrazilianStemFilter( result, excltable ); return result; } @@ -162,10 +207,11 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new LowerCaseFilter(streams.source); streams.result = new StandardFilter(streams.result); - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); streams.result = new BrazilianStemFilter(streams.result, excltable); setPreviousTokenStream(streams); } else { Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -48,15 +49,27 @@ //The default maximum percentage (40%) of index documents which //can contain a term, after which the term is considered to be a stop word. public static final float defaultMaxDocFreqPercent = 0.4f; + private final Version matchVersion; /** * Initializes this analyzer with the Analyzer object that actually produces the tokens * * @param delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering + * @deprecated Use {@link #QueryAutoStopWordAnalyzer(Version, Analyzer)} instead */ public QueryAutoStopWordAnalyzer(Analyzer delegate) { + this(Version.LUCENE_24, delegate); + } + + /** + * Initializes this analyzer with the Analyzer object that actually produces the tokens + * + * @param delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering + */ + public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer delegate) { this.delegate = delegate; setOverridesTokenStreamMethod(QueryAutoStopWordAnalyzer.class); + this.matchVersion = matchVersion; } /** @@ -175,7 +188,8 @@ } HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName); if (stopWords != null) { - result = new StopFilter(result, stopWords); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stopWords); } return result; } @@ -217,7 +231,8 @@ /* if there are any stopwords for the field, save the stopfilter */ HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName); if (stopWords != null) - streams.withStopFilter = new StopFilter(streams.wrapped, stopWords); + streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.wrapped, stopWords); else streams.withStopFilter = streams.wrapped; @@ -238,7 +253,8 @@ streams.wrapped = result; HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName); if (stopWords != null) - streams.withStopFilter = new StopFilter(streams.wrapped, stopWords); + streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.wrapped, stopWords); else streams.withStopFilter = streams.wrapped; } Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; @@ -56,23 +57,45 @@ * stop word list */ private Set stopTable; + private final Version matchVersion; //~ Constructors ----------------------------------------------------------- /** * Builds an analyzer which removes words in {@link #STOP_WORDS}. + * + * @deprecated Use {@link #CJKAnalyzer(Version)} instead */ public CJKAnalyzer() { + this(Version.LUCENE_24); + } + + /** + * Builds an analyzer which removes words in {@link #STOP_WORDS}. + */ + public CJKAnalyzer(Version matchVersion) { stopTable = StopFilter.makeStopSet(STOP_WORDS); + this.matchVersion = matchVersion; } /** * Builds an analyzer which removes words in the provided array. * * @param stopWords stop word array + * @deprecated Use {@link #CJKAnalyzer(Version, String[])} instead */ public CJKAnalyzer(String[] stopWords) { + this(Version.LUCENE_24, stopWords); + } + + /** + * Builds an analyzer which removes words in the provided array. + * + * @param stopWords stop word array + */ + public CJKAnalyzer(Version matchVersion, String[] stopWords) { stopTable = StopFilter.makeStopSet(stopWords); + this.matchVersion = matchVersion; } //~ Methods ---------------------------------------------------------------- @@ -86,7 +109,8 @@ * {@link StopFilter} */ public final TokenStream tokenStream(String fieldName, Reader reader) { - return new StopFilter(new CJKTokenizer(reader), stopTable); + return new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + new CJKTokenizer(reader), stopTable); } private class SavedStreams { @@ -109,7 +133,8 @@ if (streams == null) { streams = new SavedStreams(); streams.source = new CJKTokenizer(reader); - streams.result = new StopFilter(streams.source, stopTable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.source, stopTable); setPreviousTokenStream(streams); } else { streams.source.reset(reader); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; import java.io.File; import java.io.IOException; @@ -43,6 +44,17 @@ * exclusion list is empty by default. * * + * + *You must specify the required {@link Version} + * compatibility when creating FrenchAnalyzer: + *
NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.
+ * * @version $Id$ */ public final class FrenchAnalyzer extends Analyzer { @@ -84,26 +96,60 @@ */ private Set excltable = new HashSet(); + private final Version matchVersion; + /** * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}). + * + * @deprecated Use {@link #FrenchAnalyzer(Version)} instead. */ public FrenchAnalyzer() { + this(Version.LUCENE_23); + } + + /** + * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}). + */ + public FrenchAnalyzer(Version matchVersion) { stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #FrenchAnalyzer(Version, + * String[])} instead. */ public FrenchAnalyzer(String[] stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + */ + public FrenchAnalyzer(Version matchVersion, String[] stopwords) { stoptable = StopFilter.makeStopSet(stopwords); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. * @throws IOException + * + * @deprecated Use {@link #FrenchAnalyzer(Version, File)} instead */ public FrenchAnalyzer(File stopwords) throws IOException { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * @throws IOException + */ + public FrenchAnalyzer(Version matchVersion, File stopwords) throws IOException { stoptable = new HashSet(WordlistLoader.getWordSet(stopwords)); + this.matchVersion = matchVersion; } /** @@ -144,9 +190,10 @@ if (fieldName == null) throw new IllegalArgumentException("fieldName must not be null"); if (reader == null) throw new IllegalArgumentException("reader must not be null"); - TokenStream result = new StandardTokenizer(reader); + TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(result); - result = new StopFilter(result, stoptable); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable); result = new FrenchStemFilter(result, excltable); // Convert to lowercase after stemming! result = new LowerCaseFilter(result); @@ -171,9 +218,10 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); streams.result = new FrenchStemFilter(streams.result, excltable); // Convert to lowercase after stemming! streams.result = new LowerCaseFilter(streams.result); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; import java.io.File; import java.io.IOException; @@ -42,6 +43,9 @@ * A default set of stopwords is used unless an alternative list is specified, but the * exclusion list is empty by default. * + * + *NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.
*/ public class DutchAnalyzer extends Analyzer { /** @@ -73,48 +77,92 @@ private Set excltable = new HashSet(); private Map stemdict = new HashMap(); + private final Version matchVersion; + /** + * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}) + * and a few default entries for the stem exclusion table. + * + * @deprecated Use {@link #DutchAnalyzer(Version)} instead + */ + public DutchAnalyzer() { + this(Version.LUCENE_23); + } /** * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}) * and a few default entries for the stem exclusion table. * */ - public DutchAnalyzer() { + public DutchAnalyzer(Version matchVersion) { setOverridesTokenStreamMethod(DutchAnalyzer.class); stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS); stemdict.put("fiets", "fiets"); //otherwise fiet stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet stemdict.put("ei", "eier"); stemdict.put("kind", "kinder"); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. * * @param stopwords + * @deprecated Use {@link #DutchAnalyzer(Version, String[])} instead */ public DutchAnalyzer(String[] stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param matchVersion + * @param stopwords + */ + public DutchAnalyzer(Version matchVersion, String[] stopwords) { setOverridesTokenStreamMethod(DutchAnalyzer.class); stoptable = StopFilter.makeStopSet(stopwords); + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. * * @param stopwords + * @deprecated Use {@link #DutchAnalyzer(Version, HashSet)} instead */ public DutchAnalyzer(HashSet stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param stopwords + */ + public DutchAnalyzer(Version matchVersion, HashSet stopwords) { setOverridesTokenStreamMethod(DutchAnalyzer.class); stoptable = stopwords; + this.matchVersion = matchVersion; } /** * Builds an analyzer with the given stop words. * * @param stopwords + * @deprecated Use {@link #DutchAnalyzer(Version, File)} instead */ public DutchAnalyzer(File stopwords) { + this(Version.LUCENE_23, stopwords); + } + + /** + * Builds an analyzer with the given stop words. + * + * @param stopwords + */ + public DutchAnalyzer(Version matchVersion, File stopwords) { setOverridesTokenStreamMethod(DutchAnalyzer.class); try { stoptable = org.apache.lucene.analysis.WordlistLoader.getWordSet(stopwords); @@ -122,6 +170,7 @@ // TODO: throw IOException throw new RuntimeException(e); } + this.matchVersion = matchVersion; } /** @@ -179,9 +228,10 @@ * and {@link DutchStemFilter} */ public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(reader); + TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(result); - result = new StopFilter(result, stoptable); + result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable); result = new DutchStemFilter(result, excltable, stemdict); return result; } @@ -211,9 +261,10 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); streams.result = new DutchStemFilter(streams.result, excltable, stemdict); setPreviousTokenStream(streams); } else { Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java (working copy) @@ -25,22 +25,34 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; /** * {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words. * @version 0.2 + * + *NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.
*/ public class ThaiAnalyzer extends Analyzer { + private final Version matchVersion; + + /** @deprecated Use {@link #ThaiAnalyzer(Version)} instead */ + public ThaiAnalyzer() { + this(Version.LUCENE_23); + } - public ThaiAnalyzer() { + public ThaiAnalyzer(Version matchVersion) { setOverridesTokenStreamMethod(ThaiAnalyzer.class); + this.matchVersion = matchVersion; } public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream ts = new StandardTokenizer(reader); + TokenStream ts = new StandardTokenizer(matchVersion, reader); ts = new StandardFilter(ts); ts = new ThaiWordFilter(ts); - ts = new StopFilter(ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET); + ts = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET); return ts; } @@ -60,10 +72,11 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new ThaiWordFilter(streams.result); - streams.result = new StopFilter(streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); setPreviousTokenStream(streams); } else { streams.source.reset(reader); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (revision 828807) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.WordlistLoader; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.util.Version; import java.io.*; import java.util.HashSet; @@ -37,6 +38,9 @@ * will not be indexed at all). * A default set of stopwords is used unless an alternative list is specified. * + * + *NOTE: This class uses the same {@link Version} + * dependent settings as {@link StandardAnalyzer}.
*/ public final class CzechAnalyzer extends Analyzer { @@ -68,32 +72,70 @@ * Contains the stopwords used with the {@link StopFilter}. */ private Set stoptable; + private final Version matchVersion; /** * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}). + * + * @deprecated Use {@link #CzechAnalyzer(Version)} instead */ public CzechAnalyzer() { - stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS ); + this(Version.LUCENE_23); } + /** + * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}). + */ + public CzechAnalyzer(Version matchVersion) { + stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS ); + this.matchVersion = matchVersion; + } /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #CzechAnalyzer(Version, String[])} instead */ public CzechAnalyzer( String[] stopwords ) { - stoptable = StopFilter.makeStopSet( stopwords ); + this(Version.LUCENE_23, stopwords); } + /** + * Builds an analyzer with the given stop words. + */ + public CzechAnalyzer(Version matchVersion, String[] stopwords) { + stoptable = StopFilter.makeStopSet( stopwords ); + this.matchVersion = matchVersion; + } + + /** + * @deprecated Use {@link #CzechAnalyzer(Version, HashSet)} instead + */ public CzechAnalyzer( HashSet stopwords ) { - stoptable = stopwords; + this(Version.LUCENE_23, stopwords); } + public CzechAnalyzer(Version matchVersion, HashSet stopwords) { + stoptable = stopwords; + this.matchVersion = matchVersion; + } + /** * Builds an analyzer with the given stop words. + * + * @deprecated Use {@link #CzechAnalyzer(Version, File)} instead */ public CzechAnalyzer( File stopwords ) throws IOException { - stoptable = WordlistLoader.getWordSet( stopwords ); + this(Version.LUCENE_23, stopwords); } + /** + * Builds an analyzer with the given stop words. + */ + public CzechAnalyzer(Version matchVersion, File stopwords ) throws IOException { + stoptable = WordlistLoader.getWordSet( stopwords ); + this.matchVersion = matchVersion; + } + /** * Loads stopwords hash from resource stream (file, database...). * @param wordfile File containing the wordlist @@ -135,10 +177,11 @@ * {@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter} */ public final TokenStream tokenStream( String fieldName, Reader reader ) { - TokenStream result = new StandardTokenizer( reader ); + TokenStream result = new StandardTokenizer( matchVersion, reader ); result = new StandardFilter( result ); result = new LowerCaseFilter( result ); - result = new StopFilter( result, stoptable ); + result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + result, stoptable ); return result; } @@ -159,10 +202,11 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new StandardTokenizer(reader); + streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new LowerCaseFilter(streams.result); - streams.result = new StopFilter(streams.result, stoptable); + streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), + streams.result, stoptable); setPreviousTokenStream(streams); } else { streams.source.reset(reader);