Index: solr/webapp/web/admin/analysis.jsp =================================================================== --- solr/webapp/web/admin/analysis.jsp (revision 1144761) +++ solr/webapp/web/admin/analysis.jsp (revision ) @@ -204,7 +204,7 @@ } } - TokenStream tstream = tfac.create(tchain.charStream(new StringReader(val))); + TokenStream tstream = tfac.create(tchain.initReader(new StringReader(val))); List tokens = getTokens(tstream); if (verbose) { writeHeader(out, tfac.getClass(), tfac.getArgs()); Index: modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (revision 1170424) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (revision ) @@ -16,20 +16,20 @@ * limitations under the License. */ +import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.Version; import org.apache.lucene.util.BytesRef; import java.io.IOException; -import java.io.Reader; import java.util.*; /** @@ -42,9 +42,9 @@ * this term to take 2 seconds. *

*/ -public final class QueryAutoStopWordAnalyzer extends Analyzer { +public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper { - private final Analyzer delegate; + private final ReusableAnalyzerBase delegate; private final Map> stopWordsPerField = new HashMap>(); //The default maximum percentage (40%) of index documents which //can contain a term, after which the term is considered to be a stop word. @@ -63,7 +63,7 @@ */ public QueryAutoStopWordAnalyzer( Version matchVersion, - Analyzer delegate, + ReusableAnalyzerBase delegate, IndexReader indexReader) throws IOException { this(matchVersion, delegate, indexReader, defaultMaxDocFreqPercent); } @@ -81,7 +81,7 @@ */ public QueryAutoStopWordAnalyzer( Version matchVersion, - Analyzer delegate, + ReusableAnalyzerBase delegate, IndexReader indexReader, int maxDocFreq) throws IOException { this(matchVersion, delegate, indexReader, indexReader.getFieldNames(IndexReader.FieldOption.INDEXED), maxDocFreq); @@ -101,7 +101,7 @@ */ public QueryAutoStopWordAnalyzer( Version matchVersion, - Analyzer delegate, + ReusableAnalyzerBase delegate, IndexReader indexReader, float maxPercentDocs) throws IOException { this(matchVersion, delegate, indexReader, indexReader.getFieldNames(IndexReader.FieldOption.INDEXED), maxPercentDocs); @@ -122,7 +122,7 @@ */ public QueryAutoStopWordAnalyzer( Version matchVersion, - Analyzer delegate, + ReusableAnalyzerBase delegate, IndexReader indexReader, Collection fields, float maxPercentDocs) throws IOException { @@ -143,7 +143,7 @@ */ public QueryAutoStopWordAnalyzer( Version matchVersion, - Analyzer delegate, + ReusableAnalyzerBase delegate, IndexReader indexReader, Collection fields, int maxDocFreq) throws IOException { @@ -168,81 +168,20 @@ } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result; - try { - result = delegate.reusableTokenStream(fieldName, reader); - } catch (IOException e) { - result = delegate.tokenStream(fieldName, reader); + protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) { + return delegate; - } + } - Set stopWords = stopWordsPerField.get(fieldName); - if (stopWords != null) { - result = new StopFilter(matchVersion, result, stopWords); - } - return result; - } - + - private class SavedStreams { - /* the underlying stream */ - TokenStream wrapped; - - /* - * when there are no stopwords for the field, refers to wrapped. - * if there stopwords, it is a StopFilter around wrapped. - */ - TokenStream withStopFilter; - } - - @SuppressWarnings("unchecked") @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - /* map of SavedStreams for each field */ - Map streamMap = (Map) getPreviousTokenStream(); - if (streamMap == null) { - streamMap = new HashMap(); - setPreviousTokenStream(streamMap); - } - - SavedStreams streams = streamMap.get(fieldName); - if (streams == null) { - /* an entry for this field does not exist, create one */ - streams = new SavedStreams(); - streamMap.put(fieldName, streams); - streams.wrapped = delegate.reusableTokenStream(fieldName, reader); - - /* if there are any stopwords for the field, save the stopfilter */ + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { - Set stopWords = stopWordsPerField.get(fieldName); + Set stopWords = stopWordsPerField.get(fieldName); - if (stopWords != null) { - streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords); - } else { - streams.withStopFilter = streams.wrapped; + if (stopWords == null) { + return components; - } + } - } else { - /* - * an entry for this field exists, verify the wrapped stream has not - * changed. if it has not, reuse it, otherwise wrap the new stream. - */ - TokenStream result = delegate.reusableTokenStream(fieldName, reader); - if (result == streams.wrapped) { - /* the wrapped analyzer reused the stream */ - } else { - /* - * the wrapped analyzer did not. if there are any stopwords for the - * field, create a new StopFilter around the new stream - */ - streams.wrapped = result; - Set stopWords = stopWordsPerField.get(fieldName); - if (stopWords != null) { - streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords); - } else { - streams.withStopFilter = streams.wrapped; + StopFilter stopFilter = new StopFilter(matchVersion, components.getTokenStream(), stopWords); + return new TokenStreamComponents(components.getTokenizer(), stopFilter); - } + } - } - } - return streams.withStopFilter; - } - /** * Provides information on which stop words have been identified for a field * Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java (revision 1170942) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java (revision ) @@ -17,20 +17,16 @@ * limitations under the License. */ -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.IndexableField; +import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.ReusableAnalyzerBase; -import java.io.Reader; -import java.io.IOException; import java.util.Collections; import java.util.Map; -import java.util.HashMap; /** * This analyzer is used to facilitate scenarios where different * fields require different analysis techniques. Use the Map - * argument in {@link #PerFieldAnalyzerWrapper(Analyzer, java.util.Map)} + * argument in {@link #PerFieldAnalyzerWrapper(ReusableAnalyzerBase, java.util.Map)} * to add non-default analyzers for fields. * *

Example usage: @@ -50,9 +46,9 @@ *

A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing * and query parsing. */ -public final class PerFieldAnalyzerWrapper extends Analyzer { - private final Analyzer defaultAnalyzer; - private final Map fieldAnalyzers; +public final class PerFieldAnalyzerWrapper extends AnalyzerWrapper { + private final ReusableAnalyzerBase defaultAnalyzer; + private final Map fieldAnalyzers; /** * Constructs with default analyzer. @@ -60,7 +56,7 @@ * @param defaultAnalyzer Any fields not specifically * defined to use a different analyzer will use the one provided here. */ - public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) { + public PerFieldAnalyzerWrapper(ReusableAnalyzerBase defaultAnalyzer) { this(defaultAnalyzer, null); } @@ -73,51 +69,24 @@ * @param fieldAnalyzers a Map (String field name to the Analyzer) to be * used for those fields */ - public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, - Map fieldAnalyzers) { + public PerFieldAnalyzerWrapper(ReusableAnalyzerBase defaultAnalyzer, + Map fieldAnalyzers) { this.defaultAnalyzer = defaultAnalyzer; - this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : Collections.emptyMap(); + this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : Collections.emptyMap(); } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - Analyzer analyzer = fieldAnalyzers.get(fieldName); - if (analyzer == null) { - analyzer = defaultAnalyzer; + protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) { + ReusableAnalyzerBase analyzer = fieldAnalyzers.get(fieldName); + return (analyzer != null) ? analyzer : defaultAnalyzer; - } + } - return analyzer.tokenStream(fieldName, reader); - } - @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - Analyzer analyzer = fieldAnalyzers.get(fieldName); - if (analyzer == null) - analyzer = defaultAnalyzer; - - return analyzer.reusableTokenStream(fieldName, reader); + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + return components; } - /** Return the positionIncrementGap from the analyzer assigned to fieldName */ @Override - public int getPositionIncrementGap(String fieldName) { - Analyzer analyzer = fieldAnalyzers.get(fieldName); - if (analyzer == null) - analyzer = defaultAnalyzer; - return analyzer.getPositionIncrementGap(fieldName); - } - - /** Return the offsetGap from the analyzer assigned to field */ - @Override - public int getOffsetGap(IndexableField field) { - Analyzer analyzer = fieldAnalyzers.get(field.name()); - if (analyzer == null) { - analyzer = defaultAnalyzer; - } - return analyzer.getOffsetGap(field); - } - - @Override public String toString() { return "PerFieldAnalyzerWrapper(" + fieldAnalyzers + ", default=" + defaultAnalyzer + ")"; } Index: solr/core/src/java/org/apache/solr/schema/IndexSchema.java =================================================================== --- solr/core/src/java/org/apache/solr/schema/IndexSchema.java (revision 1169470) +++ solr/core/src/java/org/apache/solr/schema/IndexSchema.java (revision ) @@ -18,7 +18,8 @@ package org.apache.solr.schema; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; @@ -41,8 +42,6 @@ import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; -import java.io.Reader; -import java.io.IOException; import java.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -292,62 +291,49 @@ queryAnalyzer = new SolrQueryAnalyzer(); } - private class SolrIndexAnalyzer extends Analyzer { - protected final HashMap analyzers; + private class SolrIndexAnalyzer extends AnalyzerWrapper { + protected final HashMap analyzers; SolrIndexAnalyzer() { analyzers = analyzerCache(); } - protected HashMap analyzerCache() { - HashMap cache = new HashMap(); + protected HashMap analyzerCache() { + HashMap cache = new HashMap(); - for (SchemaField f : getFields().values()) { + for (SchemaField f : getFields().values()) { - Analyzer analyzer = f.getType().getAnalyzer(); + ReusableAnalyzerBase analyzer = (ReusableAnalyzerBase) f.getType().getAnalyzer(); cache.put(f.getName(), analyzer); } return cache; } - protected Analyzer getAnalyzer(String fieldName) - { - Analyzer analyzer = analyzers.get(fieldName); - return analyzer!=null ? analyzer : getDynamicFieldType(fieldName).getAnalyzer(); - } - @Override - public TokenStream tokenStream(String fieldName, Reader reader) - { - return getAnalyzer(fieldName).tokenStream(fieldName,reader); + protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) { + ReusableAnalyzerBase analyzer = analyzers.get(fieldName); + return analyzer != null ? analyzer : (ReusableAnalyzerBase) getDynamicFieldType(fieldName).getAnalyzer(); } @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - return getAnalyzer(fieldName).reusableTokenStream(fieldName,reader); + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + return components; } - - @Override - public int getPositionIncrementGap(String fieldName) { - return getAnalyzer(fieldName).getPositionIncrementGap(fieldName); - } + } - } - private class SolrQueryAnalyzer extends SolrIndexAnalyzer { @Override - protected HashMap analyzerCache() { - HashMap cache = new HashMap(); + protected HashMap analyzerCache() { + HashMap cache = new HashMap(); for (SchemaField f : getFields().values()) { - Analyzer analyzer = f.getType().getQueryAnalyzer(); + ReusableAnalyzerBase analyzer = (ReusableAnalyzerBase) f.getType().getQueryAnalyzer(); cache.put(f.getName(), analyzer); } return cache; } @Override - protected Analyzer getAnalyzer(String fieldName) - { - Analyzer analyzer = analyzers.get(fieldName); - return analyzer!=null ? analyzer : getDynamicFieldType(fieldName).getQueryAnalyzer(); + protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) { + ReusableAnalyzerBase analyzer = analyzers.get(fieldName); + return analyzer != null ? analyzer : (ReusableAnalyzerBase) getDynamicFieldType(fieldName).getQueryAnalyzer(); } } Index: solr/core/src/test/org/apache/solr/schema/IndexSchemaTest.java =================================================================== --- solr/core/src/test/org/apache/solr/schema/IndexSchemaTest.java (revision 1169470) +++ solr/core/src/test/org/apache/solr/schema/IndexSchemaTest.java (revision ) @@ -17,26 +17,25 @@ package org.apache.solr.schema; -import java.util.HashMap; -import java.util.Map; - +import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.core.SolrCore; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; -import org.apache.lucene.search.similarities.SimilarityProvider; import org.junit.BeforeClass; import org.junit.Test; +import java.util.HashMap; +import java.util.Map; + public class IndexSchemaTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { initCore("solrconfig.xml","schema.xml"); - } + } /** * This test assumes the schema includes: @@ -44,22 +43,22 @@ * */ @Test - public void testDynamicCopy() + public void testDynamicCopy() { SolrCore core = h.getCore(); assertU(adoc("id", "10", "title", "test", "aaa_dynamic", "aaa")); assertU(commit()); - + Map args = new HashMap(); args.put( CommonParams.Q, "title:test" ); args.put( "indent", "true" ); SolrQueryRequest req = new LocalSolrQueryRequest( core, new MapSolrParams( args) ); - + assertQ("Make sure they got in", req ,"//*[@numFound='1']" ,"//result/doc[1]/int[@name='id'][.='10']" ); - + args = new HashMap(); args.put( CommonParams.Q, "aaa_dynamic:aaa" ); args.put( "indent", "true" ); @@ -87,47 +86,8 @@ assertTrue("wrong class", similarityProvider instanceof MockConfigurableSimilarityProvider); assertEquals("is there an echo?", ((MockConfigurableSimilarityProvider)similarityProvider).getPassthrough()); } - + @Test - public void testRuntimeFieldCreation() - { - // any field manipulation needs to happen when you know the core will not - // be accepting any requests. Typically this is done within the inform() - // method. Since this is a single threaded test, we can change the fields - // willi-nilly - - SolrCore core = h.getCore(); - IndexSchema schema = core.getSchema(); - final String fieldName = "runtimefield"; - SchemaField sf = new SchemaField( fieldName, schema.getFieldTypes().get( "string" ) ); - schema.getFields().put( fieldName, sf ); - - // also register a new copy field (from our new field) - schema.registerCopyField( fieldName, "dynamic_runtime" ); - schema.refreshAnalyzers(); - - assertU(adoc("id", "10", "title", "test", fieldName, "aaa")); - assertU(commit()); - - SolrQuery query = new SolrQuery( fieldName+":aaa" ); - query.set( "indent", "true" ); - SolrQueryRequest req = new LocalSolrQueryRequest( core, query ); - - assertQ("Make sure they got in", req - ,"//*[@numFound='1']" - ,"//result/doc[1]/int[@name='id'][.='10']" - ); - - // Check to see if our copy field made it out safely - query.setQuery( "dynamic_runtime:aaa" ); - assertQ("Make sure they got in", req - ,"//*[@numFound='1']" - ,"//result/doc[1]/int[@name='id'][.='10']" - ); - clearIndex(); - } - - @Test public void testIsDynamicField() throws Exception { SolrCore core = h.getCore(); IndexSchema schema = core.getSchema(); @@ -141,6 +101,5 @@ SolrCore core = h.getCore(); IndexSchema schema = core.getSchema(); assertFalse(schema.getField("id").multiValued()); - } } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java (revision 1162347) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java (revision ) @@ -17,54 +17,37 @@ * limitations under the License. */ -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.IndexableField; +import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.ReusableAnalyzerBase; -import java.io.Reader; -import java.io.IOException; - /** * This Analyzer limits the number of tokens while indexing. It is * a replacement for the maximum field length setting inside {@link org.apache.lucene.index.IndexWriter}. */ -public final class LimitTokenCountAnalyzer extends Analyzer { - private final Analyzer delegate; +public final class LimitTokenCountAnalyzer extends AnalyzerWrapper { + private final ReusableAnalyzerBase delegate; private final int maxTokenCount; /** * Build an analyzer that limits the maximum number of tokens per field. */ - public LimitTokenCountAnalyzer(Analyzer delegate, int maxTokenCount) { + public LimitTokenCountAnalyzer(ReusableAnalyzerBase delegate, int maxTokenCount) { this.delegate = delegate; this.maxTokenCount = maxTokenCount; } - + @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new LimitTokenCountFilter( - delegate.tokenStream(fieldName, reader), maxTokenCount - ); + protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) { + return delegate; } - + @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - return new LimitTokenCountFilter( - delegate.reusableTokenStream(fieldName, reader), maxTokenCount - ); + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + return new TokenStreamComponents(components.getTokenizer(), + new LimitTokenCountFilter(components.getTokenStream(), maxTokenCount)); } @Override - public int getPositionIncrementGap(String fieldName) { - return delegate.getPositionIncrementGap(fieldName); - } - - @Override - public int getOffsetGap(IndexableField field) { - return delegate.getOffsetGap(field); - } - - @Override public String toString() { return "LimitTokenCountAnalyzer(" + delegate.toString() + ", maxTokenCount=" + maxTokenCount + ")"; } Index: solr/core/src/test/org/apache/solr/schema/IndexSchemaRuntimeFieldTest.java =================================================================== --- solr/core/src/test/org/apache/solr/schema/IndexSchemaRuntimeFieldTest.java (revision ) +++ solr/core/src/test/org/apache/solr/schema/IndexSchemaRuntimeFieldTest.java (revision ) @@ -0,0 +1,72 @@ +package org.apache.solr.schema; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.request.SolrQueryRequest; +import org.junit.BeforeClass; +import org.junit.Test; + +public class IndexSchemaRuntimeFieldTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml","schema.xml"); + } + + @Test + public void testRuntimeFieldCreation() { + // any field manipulation needs to happen when you know the core will not + // be accepting any requests. Typically this is done within the inform() + // method. Since this is a single threaded test, we can change the fields + // willi-nilly + + SolrCore core = h.getCore(); + IndexSchema schema = core.getSchema(); + final String fieldName = "runtimefield"; + SchemaField sf = new SchemaField( fieldName, schema.getFieldTypes().get( "string" ) ); + schema.getFields().put( fieldName, sf ); + + // also register a new copy field (from our new field) + schema.registerCopyField( fieldName, "dynamic_runtime" ); + schema.refreshAnalyzers(); + + assertU(adoc("id", "10", "title", "test", fieldName, "aaa")); + assertU(commit()); + + SolrQuery query = new SolrQuery( fieldName+":aaa" ); + query.set( "indent", "true" ); + SolrQueryRequest req = new LocalSolrQueryRequest( core, query ); + + assertQ("Make sure they got in", req + ,"//*[@numFound='1']" + ,"//result/doc[1]/int[@name='id'][.='10']" + ); + + // Check to see if our copy field made it out safely + query.setQuery( "dynamic_runtime:aaa" ); + assertQ("Make sure they got in", req + ,"//*[@numFound='1']" + ,"//result/doc[1]/int[@name='id'][.='10']" + ); + clearIndex(); + } +} Index: solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (revision 1144761) +++ solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (revision ) @@ -113,7 +113,7 @@ } } - TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value))); + TokenStream tokenStream = tfac.create(tokenizerChain.initReader(new StringReader(value))); List tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); Index: solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java (revision 1144761) +++ solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java (revision ) @@ -17,10 +17,7 @@ package org.apache.solr.analysis; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.CharStream; -import org.apache.lucene.analysis.CharReader; -import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.*; import java.io.Reader; @@ -52,11 +49,11 @@ public TokenFilterFactory[] getTokenFilterFactories() { return filters; } @Override - public Reader charStream(Reader reader){ + public Reader initReader(Reader reader) { - if( charFilters != null && charFilters.length > 0 ){ + if (charFilters != null && charFilters.length > 0) { CharStream cs = CharReader.get( reader ); - for (int i=0; i analyzerPerField = new HashMap(); + Map analyzerPerField = new HashMap(); analyzerPerField.put("special", new SimpleAnalyzer(TEST_VERSION_CURRENT)); PerFieldAnalyzerWrapper analyzer = Index: modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java (revision 1170942) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java (revision ) @@ -17,11 +17,9 @@ * limitations under the License. */ -import java.io.IOException; -import java.io.Reader; - import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.util.Version; @@ -31,24 +29,24 @@ * A shingle is another name for a token based n-gram. *

*/ -public final class ShingleAnalyzerWrapper extends Analyzer { +public final class ShingleAnalyzerWrapper extends AnalyzerWrapper { - private final Analyzer defaultAnalyzer; + private final ReusableAnalyzerBase defaultAnalyzer; private final int maxShingleSize; private final int minShingleSize; private final String tokenSeparator; private final boolean outputUnigrams; private final boolean outputUnigramsIfNoShingles; - public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer) { + public ShingleAnalyzerWrapper(ReusableAnalyzerBase defaultAnalyzer) { this(defaultAnalyzer, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE); } - public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int maxShingleSize) { + public ShingleAnalyzerWrapper(ReusableAnalyzerBase defaultAnalyzer, int maxShingleSize) { this(defaultAnalyzer, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize); } - public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int minShingleSize, int maxShingleSize) { + public ShingleAnalyzerWrapper(ReusableAnalyzerBase defaultAnalyzer, int minShingleSize, int maxShingleSize) { this(defaultAnalyzer, minShingleSize, maxShingleSize, ShingleFilter.TOKEN_SEPARATOR, true, false); } @@ -68,7 +66,7 @@ * regardless of whether any shingles are available. */ public ShingleAnalyzerWrapper( - Analyzer defaultAnalyzer, + ReusableAnalyzerBase defaultAnalyzer, int minShingleSize, int maxShingleSize, String tokenSeparator, @@ -140,48 +138,18 @@ } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream wrapped; - try { - wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader); - } catch (IOException e) { - wrapped = defaultAnalyzer.tokenStream(fieldName, reader); + protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) { + return defaultAnalyzer; - } + } - ShingleFilter filter = new ShingleFilter(wrapped, minShingleSize, maxShingleSize); + + @Override + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + ShingleFilter filter = new ShingleFilter(components.getTokenStream(), minShingleSize, maxShingleSize); filter.setMinShingleSize(minShingleSize); filter.setMaxShingleSize(maxShingleSize); filter.setTokenSeparator(tokenSeparator); filter.setOutputUnigrams(outputUnigrams); filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles); - return filter; + return new TokenStreamComponents(components.getTokenizer(), filter); } - - private class SavedStreams { - TokenStream wrapped; - ShingleFilter shingle; - } +} - - @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - SavedStreams streams = (SavedStreams) getPreviousTokenStream(); - if (streams == null) { - streams = new SavedStreams(); - streams.wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader); - streams.shingle = new ShingleFilter(streams.wrapped); - setPreviousTokenStream(streams); - } else { - TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader); - if (result != streams.wrapped) { - /* the wrapped analyzer did not, create a new shingle around the new one */ - streams.wrapped = result; - streams.shingle = new ShingleFilter(streams.wrapped); - } - } - streams.shingle.setMaxShingleSize(maxShingleSize); - streams.shingle.setMinShingleSize(minShingleSize); - streams.shingle.setTokenSeparator(tokenSeparator); - streams.shingle.setOutputUnigrams(outputUnigrams); - streams.shingle.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles); - return streams.shingle; - } -} Index: lucene/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java (revision ) +++ lucene/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java (revision ) @@ -0,0 +1,89 @@ +package org.apache.lucene.analysis; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexableField; + +import java.io.Reader; + +/** + * Extension to {@link ReusableAnalyzerBase} suitable for Analyzers which wrap + * other Analyzers. + *

+ * {@link #getWrappedAnalyzer(String)} allows the Analyzer + * to wrap multiple Analyzers which are selected on a per field basis. + *

+ * {@link #wrapComponents(String, TokenStreamComponents)} allows the + * TokenStreamComponents of the wrapped Analyzer to then be wrapped + * (such as adding a new {@link TokenFilter} to form new TokenStreamComponents. + */ +public abstract class AnalyzerWrapper extends ReusableAnalyzerBase { + + /** + * Creates a new AnalyzerWrapper. Since the {@link ReuseStrategy} of + * the wrapped Analyzers are unknown, {@link PerFieldReuseStrategy} is assumed + */ + protected AnalyzerWrapper() { + super(new PerFieldReuseStrategy()); + } + + /** + * Retrieves the wrapped Analyzer appropriate for analyzing the field with + * the given name + * + * @param fieldName Name of the field which is to be analyzed + * @return Analyzer for the field with the given name. Assumed to be non-null + */ + protected abstract ReusableAnalyzerBase getWrappedAnalyzer(String fieldName); + + /** + * Wraps / alters the given TokenStreamComponents, taken from the wrapped + * Analyzer, to form new components. It is through this method that new + * TokenFilters can be added by AnalyzerWrappers. + * + * + * @param fieldName Name of the field which is to be analyzed + * @param components TokenStreamComponents taken from the wrapped Analyzer + * @return Wrapped / altered TokenStreamComponents. + */ + protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components); + + /** + * {@inheritDoc} + */ + @Override + protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) { + return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader)); + } + + /** + * {@inheritDoc} + */ + @Override + public final int getPositionIncrementGap(String fieldName) { + return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName); + } + + /** + * {@inheritDoc} + */ + @Override + public final int getOffsetGap(IndexableField field) { + return getWrappedAnalyzer(field.name()).getOffsetGap(field); + } +} Index: solr/core/src/java/org/apache/solr/schema/BoolField.java =================================================================== --- solr/core/src/java/org/apache/solr/schema/BoolField.java (revision 1162347) +++ solr/core/src/java/org/apache/solr/schema/BoolField.java (revision ) @@ -30,7 +30,6 @@ import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueBool; import org.apache.solr.search.QParser; -import org.apache.solr.search.function.*; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -70,7 +69,7 @@ protected final static Analyzer boolAnalyzer = new SolrAnalyzer() { @Override - public TokenStreamInfo getStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new Tokenizer(reader) { final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); boolean done = false; @@ -95,7 +94,7 @@ } }; - return new TokenStreamInfo(tokenizer, tokenizer); + return new TokenStreamComponents(tokenizer); } }; Index: solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java (revision 1144761) +++ solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java (revision ) @@ -25,11 +25,11 @@ /** * */ -public abstract class SolrAnalyzer extends Analyzer { +public abstract class SolrAnalyzer extends ReusableAnalyzerBase { - int posIncGap=0; + int posIncGap = 0; - + public void setPositionIncrementGap(int gap) { - posIncGap=gap; + posIncGap = gap; } @Override @@ -38,43 +38,13 @@ } /** wrap the reader in a CharStream, if appropriate */ + @Deprecated - public Reader charStream(Reader reader){ + public Reader charStream(Reader reader) { return reader; } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return getStream(fieldName, reader).getTokenStream(); + protected Reader initReader(Reader reader) { + return charStream(reader); } - - public static class TokenStreamInfo { - private final Tokenizer tokenizer; - private final TokenStream tokenStream; - public TokenStreamInfo(Tokenizer tokenizer, TokenStream tokenStream) { - this.tokenizer = tokenizer; - this.tokenStream = tokenStream; - } +} - public Tokenizer getTokenizer() { return tokenizer; } - public TokenStream getTokenStream() { return tokenStream; } - } - - - public abstract TokenStreamInfo getStream(String fieldName, Reader reader); - - @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - // if (true) return tokenStream(fieldName, reader); - TokenStreamInfo tsi = (TokenStreamInfo)getPreviousTokenStream(); - if (tsi != null) { - tsi.getTokenizer().reset(charStream(reader)); - // the consumer will currently call reset() on the TokenStream to hit all the filters. - // this isn't necessarily guaranteed by the APIs... but is currently done - // by lucene indexing in DocInverterPerField, and in the QueryParser - return tsi.getTokenStream(); - } else { - tsi = getStream(fieldName, reader); - setPreviousTokenStream(tsi); - return tsi.getTokenStream(); - } - } -} Index: modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (revision 1170424) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (revision ) @@ -33,7 +33,7 @@ String variedFieldValues[] = {"the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "boring", "dog"}; String repetitiveFieldValues[] = {"boring", "boring", "vaguelyboring"}; RAMDirectory dir; - Analyzer appAnalyzer; + ReusableAnalyzerBase appAnalyzer; IndexReader reader; QueryAutoStopWordAnalyzer protectedAnalyzer; Index: solr/core/src/java/org/apache/solr/schema/FieldType.java =================================================================== --- solr/core/src/java/org/apache/solr/schema/FieldType.java (revision 1169470) +++ solr/core/src/java/org/apache/solr/schema/FieldType.java (revision ) @@ -389,7 +389,7 @@ } @Override - public TokenStreamInfo getStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer ts = new Tokenizer(reader) { final char[] cbuf = new char[maxChars]; final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); @@ -406,7 +406,7 @@ } }; - return new TokenStreamInfo(ts, ts); + return new TokenStreamComponents(ts); } }