Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 615191) +++ CHANGES.txt (working copy) @@ -12,6 +12,10 @@ pre-existing constructors; these will be removed in release 3.0. (Steven Rowe via Mike McCandless) + 2. LUCENE-1150: Re-expose StandardTokenizer's constants publicly; + this was accidentally lost with LUCENE-966. (Nicolas Lalevée via + Mike McCandless) + Bug fixes New features Index: src/test/org/apache/lucene/analysis/TestAnalyzers.java =================================================================== --- src/test/org/apache/lucene/analysis/TestAnalyzers.java (revision 615191) +++ src/test/org/apache/lucene/analysis/TestAnalyzers.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.index.Payload; +import org.apache.lucene.analysis.standard.StandardTokenizer; public class TestAnalyzers extends LuceneTestCase { @@ -118,6 +119,18 @@ verifyPayload(ts); } + // Just a compile time test, to ensure the + // StandardAnalyzer constants remain publicly accessible + public void _testStandardConstants() { + int x = StandardTokenizer.ALPHANUM; + x = StandardTokenizer.APOSTROPHE; + x = StandardTokenizer.ACRONYM; + x = StandardTokenizer.COMPANY; + x = StandardTokenizer.EMAIL; + x = StandardTokenizer.HOST; + x = StandardTokenizer.NUM; + x = StandardTokenizer.CJ; + } } class BuffTokenFilter extends TokenFilter { Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (revision 615191) +++ src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (working copy) @@ -30,20 +30,20 @@ %{ -public static final int ALPHANUM = 0; -public static final int APOSTROPHE = 1; -public static final int ACRONYM = 2; -public static final int COMPANY = 3; -public static final int EMAIL = 4; -public static final int HOST = 5; -public static final int NUM = 6; -public static final int CJ = 7; +public static final int ALPHANUM = StandardTokenizer.ALPHANUM; +public static final int APOSTROPHE = StandardTokenizer.APOSTROPHE; +public static final int ACRONYM = StandardTokenizer.ACRONYM; +public static final int COMPANY = StandardTokenizer.COMPANY; +public static final int EMAIL = StandardTokenizer.EMAIL; +public static final int HOST = StandardTokenizer.HOST; +public static final int NUM = StandardTokenizer.NUM; +public static final int CJ = StandardTokenizer.CJ; /** * @deprecated this solves a bug where HOSTs that end with '.' are identified * as ACRONYMs. It is deprecated and will be removed in the next * release. */ -public static final int ACRONYM_DEP = 8; +public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP; public static final String [] TOKEN_TYPES = new String [] { "", Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 615191) +++ src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy) @@ -41,8 +41,24 @@ */ public class StandardTokenizer extends Tokenizer { - /** A private instance of the JFlex-constructed scanner */ - private final StandardTokenizerImpl scanner; + /** A private instance of the JFlex-constructed scanner */ + private final StandardTokenizerImpl scanner; + + public static final int ALPHANUM = 0; + public static final int APOSTROPHE = 1; + public static final int ACRONYM = 2; + public static final int COMPANY = 3; + public static final int EMAIL = 4; + public static final int HOST = 5; + public static final int NUM = 6; + public static final int CJ = 7; + + /** + * @deprecated this solves a bug where HOSTs that end with '.' are identified + * as ACRONYMs. It is deprecated and will be removed in the next + * release. + */ + public static final int ACRONYM_DEP = 8; /** * Specifies whether deprecated acronyms should be replaced with HOST type. Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (revision 615191) +++ src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (working copy) @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.4.1 on 12/18/07 9:22 PM */ +/* The following code was generated by JFlex 1.4.1 on 1/25/08 6:54 PM */ package org.apache.lucene.analysis.standard; @@ -25,8 +25,8 @@ /** * This class is a scanner generated by * JFlex 1.4.1 - * on 12/18/07 9:22 PM from the specification file - * /Volumes/User/grantingersoll/projects/lucene/java/lucene-clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex + * on 1/25/08 6:54 PM from the specification file + * /tango/mike/src/lucene.clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex */ class StandardTokenizerImpl { @@ -283,20 +283,20 @@ /* user code: */ -public static final int ALPHANUM = 0; -public static final int APOSTROPHE = 1; -public static final int ACRONYM = 2; -public static final int COMPANY = 3; -public static final int EMAIL = 4; -public static final int HOST = 5; -public static final int NUM = 6; -public static final int CJ = 7; +public static final int ALPHANUM = StandardTokenizer.ALPHANUM; +public static final int APOSTROPHE = StandardTokenizer.APOSTROPHE; +public static final int ACRONYM = StandardTokenizer.ACRONYM; +public static final int COMPANY = StandardTokenizer.COMPANY; +public static final int EMAIL = StandardTokenizer.EMAIL; +public static final int HOST = StandardTokenizer.HOST; +public static final int NUM = StandardTokenizer.NUM; +public static final int CJ = StandardTokenizer.CJ; /** * @deprecated this solves a bug where HOSTs that end with '.' are identified * as ACRONYMs. It is deprecated and will be removed in the next * release. */ -public static final int ACRONYM_DEP = 8; +public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP; public static final String [] TOKEN_TYPES = new String [] { "", Index: contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java =================================================================== --- contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java (revision 615191) +++ contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java (working copy) @@ -46,6 +46,25 @@ public static final String HEADING = "h"; public static final String SUB_HEADING = "sh"; + public static final int ALPHANUM_ID = 0; + public static final int APOSTROPHE_ID = 1; + public static final int ACRONYM_ID = 2; + public static final int COMPANY_ID = 3; + public static final int EMAIL_ID = 4; + public static final int HOST_ID = 5; + public static final int NUM_ID = 6; + public static final int CJ_ID = 7; + public static final int INTERNAL_LINK_ID = 8; + public static final int EXTERNAL_LINK_ID = 9; + public static final int CITATION_ID = 10; + public static final int CATEGORY_ID = 11; + public static final int BOLD_ID = 12; + public static final int ITALICS_ID = 13; + public static final int BOLD_ITALICS_ID = 14; + public static final int HEADING_ID = 15; + public static final int SUB_HEADING_ID = 16; + public static final int EXTERNAL_LINK_URL_ID = 17; + public static final int TOKENS_ONLY = 0; public static final int UNTOKENIZED_ONLY = 1; public static final int BOTH = 2; Index: contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.java =================================================================== --- contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.java (revision 615191) +++ contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.java (working copy) @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.4.1 on 1/16/08 10:31 AM */ +/* The following code was generated by JFlex 1.4.1 on 1/25/08 6:54 PM */ package org.apache.lucene.wikipedia.analysis; @@ -25,8 +25,8 @@ /** * This class is a scanner generated by * JFlex 1.4.1 - * on 1/16/08 10:31 AM from the specification file - * /Volumes/User/grantingersoll/projects/lucene/Lucene-Trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex + * on 1/25/08 6:54 PM from the specification file + * /tango/mike/src/lucene.clean/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex */ class WikipediaTokenizerImpl { @@ -425,24 +425,24 @@ /* user code: */ -public static final int ALPHANUM = 0; -public static final int APOSTROPHE = 1; -public static final int ACRONYM = 2; -public static final int COMPANY = 3; -public static final int EMAIL = 4; -public static final int HOST = 5; -public static final int NUM = 6; -public static final int CJ = 7; -public static final int INTERNAL_LINK = 8; -public static final int EXTERNAL_LINK = 9; -public static final int CITATION = 10; -public static final int CATEGORY = 11; -public static final int BOLD = 12; -public static final int ITALICS = 13; -public static final int BOLD_ITALICS = 14; -public static final int HEADING = 15; -public static final int SUB_HEADING = 16; -public static final int EXTERNAL_LINK_URL = 17; +public static final int ALPHANUM = WikipediaTokenizer.ALPHANUM_ID; +public static final int APOSTROPHE = WikipediaTokenizer.APOSTROPHE_ID; +public static final int ACRONYM = WikipediaTokenizer.ACRONYM_ID; +public static final int COMPANY = WikipediaTokenizer.COMPANY_ID; +public static final int EMAIL = WikipediaTokenizer.EMAIL_ID; +public static final int HOST = WikipediaTokenizer.HOST_ID; +public static final int NUM = WikipediaTokenizer.NUM_ID; +public static final int CJ = WikipediaTokenizer.CJ_ID; +public static final int INTERNAL_LINK = WikipediaTokenizer.INTERNAL_LINK_ID; +public static final int EXTERNAL_LINK = WikipediaTokenizer.EXTERNAL_LINK_ID; +public static final int CITATION = WikipediaTokenizer.CITATION_ID; +public static final int CATEGORY = WikipediaTokenizer.CATEGORY_ID; +public static final int BOLD = WikipediaTokenizer.BOLD_ID; +public static final int ITALICS = WikipediaTokenizer.ITALICS_ID; +public static final int BOLD_ITALICS = WikipediaTokenizer.BOLD_ITALICS_ID; +public static final int HEADING = WikipediaTokenizer.HEADING_ID; +public static final int SUB_HEADING = WikipediaTokenizer.SUB_HEADING_ID; +public static final int EXTERNAL_LINK_URL = WikipediaTokenizer.EXTERNAL_LINK_URL_ID; private int currentTokType; Index: contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex =================================================================== --- contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex (revision 615191) +++ contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex (working copy) @@ -30,24 +30,24 @@ %{ -public static final int ALPHANUM = 0; -public static final int APOSTROPHE = 1; -public static final int ACRONYM = 2; -public static final int COMPANY = 3; -public static final int EMAIL = 4; -public static final int HOST = 5; -public static final int NUM = 6; -public static final int CJ = 7; -public static final int INTERNAL_LINK = 8; -public static final int EXTERNAL_LINK = 9; -public static final int CITATION = 10; -public static final int CATEGORY = 11; -public static final int BOLD = 12; -public static final int ITALICS = 13; -public static final int BOLD_ITALICS = 14; -public static final int HEADING = 15; -public static final int SUB_HEADING = 16; -public static final int EXTERNAL_LINK_URL = 17; +public static final int ALPHANUM = WikipediaTokenizer.ALPHANUM_ID; +public static final int APOSTROPHE = WikipediaTokenizer.APOSTROPHE_ID; +public static final int ACRONYM = WikipediaTokenizer.ACRONYM_ID; +public static final int COMPANY = WikipediaTokenizer.COMPANY_ID; +public static final int EMAIL = WikipediaTokenizer.EMAIL_ID; +public static final int HOST = WikipediaTokenizer.HOST_ID; +public static final int NUM = WikipediaTokenizer.NUM_ID; +public static final int CJ = WikipediaTokenizer.CJ_ID; +public static final int INTERNAL_LINK = WikipediaTokenizer.INTERNAL_LINK_ID; +public static final int EXTERNAL_LINK = WikipediaTokenizer.EXTERNAL_LINK_ID; +public static final int CITATION = WikipediaTokenizer.CITATION_ID; +public static final int CATEGORY = WikipediaTokenizer.CATEGORY_ID; +public static final int BOLD = WikipediaTokenizer.BOLD_ID; +public static final int ITALICS = WikipediaTokenizer.ITALICS_ID; +public static final int BOLD_ITALICS = WikipediaTokenizer.BOLD_ITALICS_ID; +public static final int HEADING = WikipediaTokenizer.HEADING_ID; +public static final int SUB_HEADING = WikipediaTokenizer.SUB_HEADING_ID; +public static final int EXTERNAL_LINK_URL = WikipediaTokenizer.EXTERNAL_LINK_URL_ID; private int currentTokType;