Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java (working copy) @@ -34,11 +34,6 @@ */ public class KeywordTokenizerFactory extends TokenizerFactory { @Override - public KeywordTokenizer create(Reader input) { - return new KeywordTokenizer(input); - } - - @Override public KeywordTokenizer create(AttributeFactory factory, Reader input) { return new KeywordTokenizer(factory, input, KeywordTokenizer.DEFAULT_BUFFER_SIZE); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java (working copy) @@ -42,11 +42,6 @@ } @Override - public LetterTokenizer create(Reader input) { - return new LetterTokenizer(luceneMatchVersion, input); - } - - @Override public LetterTokenizer create(AttributeFactory factory, Reader input) { return new LetterTokenizer(luceneMatchVersion, factory, input); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java (working copy) @@ -43,11 +43,6 @@ } @Override - public LowerCaseTokenizer create(Reader input) { - return new LowerCaseTokenizer(luceneMatchVersion,input); - } - - @Override public LowerCaseTokenizer create(AttributeFactory factory, Reader input) { return new LowerCaseTokenizer(luceneMatchVersion, factory, input); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java (working copy) @@ -41,11 +41,6 @@ } @Override - public WhitespaceTokenizer create(Reader input) { - return new WhitespaceTokenizer(luceneMatchVersion,input); - } - - @Override public WhitespaceTokenizer create(AttributeFactory factory, Reader input) { return new WhitespaceTokenizer(luceneMatchVersion, factory, input); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizerFactory.java (working copy) @@ -56,11 +56,6 @@ } @Override - public EdgeNGramTokenizer create(Reader input) { - return new EdgeNGramTokenizer(input, side, minGramSize, maxGramSize); - } - - @Override public EdgeNGramTokenizer create(AttributeFactory factory, Reader input) { return new EdgeNGramTokenizer(factory, input, side, minGramSize, maxGramSize); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizerFactory.java (working copy) @@ -49,13 +49,8 @@ minGramSize = (minArg != null ? Integer.parseInt(minArg) : NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE); } - /** Creates the {@link TokenStream} of n-grams from the given {@link Reader}. */ + /** Creates the {@link TokenStream} of n-grams from the given {@link Reader} and {@link AttributeFactory}. */ @Override - public NGramTokenizer create(Reader input) { - return new NGramTokenizer(input, minGramSize, maxGramSize); - } - - @Override public NGramTokenizer create(AttributeFactory factory, Reader input) { return new NGramTokenizer(factory, input, minGramSize, maxGramSize); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (working copy) @@ -63,8 +63,17 @@ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip); } + public PathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) { + this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip); + } + public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) { - super(input); + this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip); + } + + public PathHierarchyTokenizer + (AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) { + super(factory, input); if (bufferSize < 0) { throw new IllegalArgumentException("bufferSize cannot be negative"); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizerFactory.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.TokenizerFactory; +import org.apache.lucene.util.AttributeSource.AttributeFactory; /** * Factory for {@link PathHierarchyTokenizer}. @@ -119,11 +120,11 @@ } @Override - public Tokenizer create(Reader input) { + public Tokenizer create(AttributeFactory factory, Reader input) { if( reverse ) { - return new ReversePathHierarchyTokenizer(input, delimiter, replacement, skip); + return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip); } - return new PathHierarchyTokenizer(input, delimiter, replacement, skip); + return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip); } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java (working copy) @@ -75,8 +75,17 @@ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip); } + public ReversePathHierarchyTokenizer + (AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) { + this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip); + } + public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) { - super(input); + this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip); + } + public ReversePathHierarchyTokenizer + (AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) { + super(factory, input); if (bufferSize < 0) { throw new IllegalArgumentException("bufferSize cannot be negative"); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (working copy) @@ -66,7 +66,12 @@ /** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */ public PatternTokenizer(Reader input, Pattern pattern, int group) throws IOException { - super(input); + this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, pattern, group); + } + + /** creates a new PatternTokenizer returning tokens from group (-1 for split functionality) */ + public PatternTokenizer(AttributeFactory factory, Reader input, Pattern pattern, int group) throws IOException { + super(factory, input); this.pattern = pattern; this.group = group; Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizerFactory.java (working copy) @@ -22,9 +22,8 @@ import java.util.Map; import java.util.regex.Pattern; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.pattern.PatternTokenizer; import org.apache.lucene.analysis.util.TokenizerFactory; +import org.apache.lucene.util.AttributeSource.AttributeFactory; /** * Factory for {@link PatternTokenizer}. @@ -91,9 +90,9 @@ * Split the input using configured pattern */ @Override - public PatternTokenizer create(final Reader in) { + public PatternTokenizer create(final AttributeFactory factory, final Reader in) { try { - return new PatternTokenizer(in, pattern, group); + return new PatternTokenizer(factory, in, pattern, group); } catch( IOException ex ) { throw new RuntimeException("IOException thrown creating PatternTokenizer instance", ex); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerFactory.java (working copy) @@ -48,13 +48,6 @@ } @Override - public ClassicTokenizer create(Reader input) { - ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, input); - tokenizer.setMaxTokenLength(maxTokenLength); - return tokenizer; - } - - @Override public ClassicTokenizer create(AttributeFactory factory, Reader input) { ClassicTokenizer tokenizer = new ClassicTokenizer(luceneMatchVersion, factory, input); tokenizer.setMaxTokenLength(maxTokenLength); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerFactory.java (working copy) @@ -42,22 +42,12 @@ public void init(Map args) { super.init(args); assureMatchVersion(); - maxTokenLength = getInt("maxTokenLength", - StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); + maxTokenLength = getInt("maxTokenLength", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); } @Override - public StandardTokenizer create(Reader input) { - StandardTokenizer tokenizer - = new StandardTokenizer(luceneMatchVersion, input); - tokenizer.setMaxTokenLength(maxTokenLength); - return tokenizer; - } - - @Override public StandardTokenizer create(AttributeFactory factory, Reader input) { - StandardTokenizer tokenizer - = new StandardTokenizer(luceneMatchVersion, factory, input); + StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java (working copy) @@ -48,13 +48,6 @@ } @Override - public UAX29URLEmailTokenizer create(Reader input) { - UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, input); - tokenizer.setMaxTokenLength(maxTokenLength); - return tokenizer; - } - - @Override public UAX29URLEmailTokenizer create(AttributeFactory factory, Reader input) { UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory, input); tokenizer.setMaxTokenLength(maxTokenLength); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java (working copy) @@ -62,11 +62,11 @@ loader.reload(classloader); } - /** Creates a TokenStream of the specified input */ - public abstract Tokenizer create(Reader input); + /** Creates a TokenStream of the specified input using the default attribute factory. */ + public final Tokenizer create(Reader input) { + return create(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input); + } /** Creates a TokenStream of the specified input using the given AttributeFactory */ - public Tokenizer create(AttributeFactory factory, Reader input) { - throw new UnsupportedOperationException(); - } + abstract public Tokenizer create(AttributeFactory factory, Reader input); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java (revision 1456718) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java (working copy) @@ -36,11 +36,6 @@ public class WikipediaTokenizerFactory extends TokenizerFactory { // TODO: add support for WikipediaTokenizer's advanced options. @Override - public WikipediaTokenizer create(Reader input) { - return new WikipediaTokenizer(input); - } - - @Override public WikipediaTokenizer create(AttributeFactory factory, Reader input) { return new WikipediaTokenizer(factory, input, WikipediaTokenizer.TOKENS_ONLY, Collections.emptySet()); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java (revision 1456718) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java (working copy) @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.util.StringMockResourceLoader; import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenizerFactory; +import org.apache.lucene.util.AttributeSource.AttributeFactory; /** * Sanity check some things about all factories, @@ -146,8 +147,8 @@ // some silly classes just so we can use checkRandomData private TokenizerFactory assertingTokenizer = new TokenizerFactory() { @Override - public MockTokenizer create(Reader input) { - return new MockTokenizer(input); + public MockTokenizer create(AttributeFactory factory, Reader input) { + return new MockTokenizer(factory, input); } }; Index: lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java =================================================================== --- lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (revision 1456718) +++ lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (working copy) @@ -62,6 +62,8 @@ * Reader. *

* The default script-specific handling is used. + *

+ * The default attribute factory is used. * * @param input Reader containing text to tokenize. * @see DefaultICUTokenizerConfig @@ -73,12 +75,26 @@ /** * Construct a new ICUTokenizer that breaks text into words from the given * Reader, using a tailored BreakIterator configuration. + *

+ * The default attribute factory is used. * * @param input Reader containing text to tokenize. * @param config Tailored BreakIterator configuration */ public ICUTokenizer(Reader input, ICUTokenizerConfig config) { - super(input); + this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, config); + } + + /** + * Construct a new ICUTokenizer that breaks text into words from the given + * Reader, using a tailored BreakIterator configuration. + * + * @param factory AttributeFactory to use + * @param input Reader containing text to tokenize. + * @param config Tailored BreakIterator configuration + */ + public ICUTokenizer(AttributeFactory factory, Reader input, ICUTokenizerConfig config) { + super(factory, input); this.config = config; breaker = new CompositeBreakIterator(config); } Index: lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java =================================================================== --- lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java (revision 1456718) +++ lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java (working copy) @@ -25,11 +25,11 @@ import java.util.List; import java.util.Map; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.TokenizerFactory; +import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.IOUtils; import com.ibm.icu.lang.UCharacter; @@ -144,8 +144,8 @@ } @Override - public ICUTokenizer create(Reader input) { + public ICUTokenizer create(AttributeFactory factory, Reader input) { assert config != null : "inform must be called first!"; - return new ICUTokenizer(input, config); + return new ICUTokenizer(factory, input, config); } } Index: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java =================================================================== --- lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (revision 1456718) +++ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (working copy) @@ -187,6 +187,8 @@ /** * Create a new JapaneseTokenizer. + *

+ * Uses the default AttributeFactory. * * @param input Reader containing text * @param userDictionary Optional: if non-null, user dictionary. @@ -194,7 +196,21 @@ * @param mode tokenization mode. */ public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) { - super(input); + this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, userDictionary, discardPunctuation, mode); + } + + /** + * Create a new JapaneseTokenizer. + * + * @param factory the AttributeFactory to use + * @param input Reader containing text + * @param userDictionary Optional: if non-null, user dictionary. + * @param discardPunctuation true if punctuation tokens should be dropped from the output. + * @param mode tokenization mode. + */ + public JapaneseTokenizer + (AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) { + super(factory, input); dictionary = TokenInfoDictionary.getInstance(); fst = dictionary.getFST(); unkDictionary = UnknownDictionary.getInstance(); Index: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java =================================================================== --- lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java (revision 1456718) +++ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizerFactory.java (working copy) @@ -27,11 +27,10 @@ import java.util.Locale; import java.util.Map; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.ja.JapaneseTokenizer; import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; import org.apache.lucene.analysis.ja.dict.UserDictionary; import org.apache.lucene.analysis.util.TokenizerFactory; +import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoaderAware; @@ -89,8 +88,8 @@ } @Override - public JapaneseTokenizer create(Reader input) { - return new JapaneseTokenizer(input, userDictionary, discardPunctuation, mode); + public JapaneseTokenizer create(AttributeFactory factory, Reader input) { + return new JapaneseTokenizer(factory, input, userDictionary, discardPunctuation, mode); } private Mode getMode(Map args) { Index: lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java =================================================================== --- lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java (revision 1456718) +++ lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseSentenceTokenizerFactory.java (working copy) @@ -28,11 +28,6 @@ */ public class SmartChineseSentenceTokenizerFactory extends TokenizerFactory { @Override - public SentenceTokenizer create(Reader input) { - return new SentenceTokenizer(input); - } - - @Override public SentenceTokenizer create(AttributeFactory factory, Reader input) { return new SentenceTokenizer(factory, input); } Index: lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java =================================================================== --- lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (revision 1456718) +++ lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (working copy) @@ -44,8 +44,9 @@ protected AnalysisEngine ae; protected CAS cas; - protected BaseUIMATokenizer(Reader reader, String descriptorPath, Map configurationParameters) { - super(reader); + protected BaseUIMATokenizer + (AttributeFactory factory, Reader reader, String descriptorPath, Map configurationParameters) { + super(factory, reader); this.descriptorPath = descriptorPath; this.configurationParameters = configurationParameters; } Index: lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java =================================================================== --- lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (revision 1456718) +++ lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (working copy) @@ -43,7 +43,12 @@ private int finalOffset = 0; public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map configurationParameters, Reader input) { - super(input, descriptorPath, configurationParameters); + this(descriptorPath, tokenType, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input); + } + + public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map configurationParameters, + AttributeFactory factory, Reader input) { + super(factory, input, descriptorPath, configurationParameters); this.tokenTypeString = tokenType; this.termAttr = addAttribute(CharTermAttribute.class); this.offsetAttr = addAttribute(OffsetAttribute.class); Index: lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java =================================================================== --- lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java (revision 1456718) +++ lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java (working copy) @@ -18,6 +18,7 @@ */ import org.apache.lucene.analysis.util.TokenizerFactory; +import org.apache.lucene.util.AttributeSource.AttributeFactory; import java.io.Reader; import java.util.HashMap; @@ -52,7 +53,7 @@ } @Override - public UIMAAnnotationsTokenizer create(Reader input) { - return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, input); + public UIMAAnnotationsTokenizer create(AttributeFactory factory, Reader input) { + return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, factory, input); } } Index: lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java =================================================================== --- lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (revision 1456718) +++ lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (working copy) @@ -53,7 +53,12 @@ private int finalOffset = 0; public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map configurationParameters, Reader input) { - super(input, descriptorPath, configurationParameters); + this(descriptorPath, tokenType, typeAttributeFeaturePath, configurationParameters, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input); + } + + public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, + Map configurationParameters, AttributeFactory factory, Reader input) { + super(factory, input, descriptorPath, configurationParameters); this.tokenTypeString = tokenType; this.termAttr = addAttribute(CharTermAttribute.class); this.typeAttr = addAttribute(TypeAttribute.class); Index: lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java =================================================================== --- lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java (revision 1456718) +++ lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java (working copy) @@ -18,6 +18,7 @@ */ import org.apache.lucene.analysis.util.TokenizerFactory; +import org.apache.lucene.util.AttributeSource.AttributeFactory; import java.io.Reader; import java.util.HashMap; @@ -54,7 +55,8 @@ } @Override - public UIMATypeAwareAnnotationsTokenizer create(Reader input) { - return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, input); + public UIMATypeAwareAnnotationsTokenizer create(AttributeFactory factory, Reader input) { + return new UIMATypeAwareAnnotationsTokenizer + (descriptorPath, tokenType, featurePath, configurationParameters, factory, input); } } Index: lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (revision 1456718) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (working copy) @@ -100,12 +100,21 @@ public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) { this(input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH); } - /** Calls {@link #MockTokenizer(Reader, CharacterRunAutomaton, boolean) MockTokenizer(Reader, WHITESPACE, true)} */ public MockTokenizer(Reader input) { this(input, WHITESPACE, true); } - + + public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) { + this(factory, input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH); + } + + /** Calls {@link #MockTokenizer(org.apache.lucene.util.AttributeSource.AttributeFactory,Reader,CharacterRunAutomaton,boolean) + * MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)} */ + public MockTokenizer(AttributeFactory factory, Reader input) { + this(input, WHITESPACE, true); + } + @Override public final boolean incrementToken() throws IOException { assert !enableChecks || (streamState == State.RESET || streamState == State.INCREMENT) Index: solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java (revision 1456718) +++ solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.solr.common.SolrException; import org.apache.solr.schema.DateField; import static org.apache.solr.schema.TrieField.TrieTypes; @@ -54,8 +55,8 @@ } @Override - public TrieTokenizer create(Reader input) { - return new TrieTokenizer(input, type, TrieTokenizer.getNumericTokenStream(precisionStep)); + public TrieTokenizer create(AttributeFactory factory, Reader input) { + return new TrieTokenizer(factory, input, type, TrieTokenizer.getNumericTokenStream(precisionStep)); } } @@ -75,7 +76,12 @@ } public TrieTokenizer(Reader input, TrieTypes type, final NumericTokenStream ts) { - // Häckidy-Hick-Hack: must share the attributes with the NumericTokenStream we delegate to, so we create a fake factory: + this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, type, ts); + } + + public TrieTokenizer(AttributeFactory factory, Reader input, TrieTypes type, final NumericTokenStream ts) { + // Hack #0: factory param is ignored + // Häckidy-Hick-Hack #1: must share the attributes with the NumericTokenStream we delegate to, so we create a fake factory: super(new AttributeFactory() { @Override public AttributeImpl createAttributeInstance(Class attClass) { Index: solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java =================================================================== --- solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java (revision 1456718) +++ solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java (working copy) @@ -21,8 +21,8 @@ import java.util.Map; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.TokenizerFactory; +import org.apache.lucene.util.AttributeSource.AttributeFactory; import org.apache.lucene.util.automaton.CharacterRunAutomaton; /** @@ -53,10 +53,9 @@ enableChecks = getBoolean("enableChecks", true); } - @Override - public MockTokenizer create(Reader input) { - MockTokenizer t = new MockTokenizer(input, pattern, false); + public MockTokenizer create(AttributeFactory factory, Reader input) { + MockTokenizer t = new MockTokenizer(factory, input, pattern, false); t.setEnableChecks(enableChecks); return t; }