stopWords = stopWordsPerField.get(fieldName);
- if (stopWords != null) {
- streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
- } else {
- streams.withStopFilter = streams.wrapped;
+ StopFilter stopFilter = new StopFilter(matchVersion, components.getTokenStream(), stopWords);
+ return new TokenStreamComponents(components.getTokenizer(), stopFilter);
- }
+ }
- }
- }
- return streams.withStopFilter;
- }
-
/**
* Provides information on which stop words have been identified for a field
*
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java (revision 1170942)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java (revision )
@@ -17,20 +17,16 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.analysis.AnalyzerWrapper;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
-import java.io.Reader;
-import java.io.IOException;
import java.util.Collections;
import java.util.Map;
-import java.util.HashMap;
/**
* This analyzer is used to facilitate scenarios where different
* fields require different analysis techniques. Use the Map
- * argument in {@link #PerFieldAnalyzerWrapper(Analyzer, java.util.Map)}
+ * argument in {@link #PerFieldAnalyzerWrapper(ReusableAnalyzerBase, java.util.Map)}
* to add non-default analyzers for fields.
*
* Example usage:
@@ -50,9 +46,9 @@
*
A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
* and query parsing.
*/
-public final class PerFieldAnalyzerWrapper extends Analyzer {
- private final Analyzer defaultAnalyzer;
- private final Map fieldAnalyzers;
+public final class PerFieldAnalyzerWrapper extends AnalyzerWrapper {
+ private final ReusableAnalyzerBase defaultAnalyzer;
+ private final Map fieldAnalyzers;
/**
* Constructs with default analyzer.
@@ -60,7 +56,7 @@
* @param defaultAnalyzer Any fields not specifically
* defined to use a different analyzer will use the one provided here.
*/
- public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) {
+ public PerFieldAnalyzerWrapper(ReusableAnalyzerBase defaultAnalyzer) {
this(defaultAnalyzer, null);
}
@@ -73,51 +69,24 @@
* @param fieldAnalyzers a Map (String field name to the Analyzer) to be
* used for those fields
*/
- public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer,
- Map fieldAnalyzers) {
+ public PerFieldAnalyzerWrapper(ReusableAnalyzerBase defaultAnalyzer,
+ Map fieldAnalyzers) {
this.defaultAnalyzer = defaultAnalyzer;
- this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : Collections.emptyMap();
+ this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : Collections.emptyMap();
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- Analyzer analyzer = fieldAnalyzers.get(fieldName);
- if (analyzer == null) {
- analyzer = defaultAnalyzer;
+ protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) {
+ ReusableAnalyzerBase analyzer = fieldAnalyzers.get(fieldName);
+ return (analyzer != null) ? analyzer : defaultAnalyzer;
- }
+ }
- return analyzer.tokenStream(fieldName, reader);
- }
-
@Override
- public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
- Analyzer analyzer = fieldAnalyzers.get(fieldName);
- if (analyzer == null)
- analyzer = defaultAnalyzer;
-
- return analyzer.reusableTokenStream(fieldName, reader);
+ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
+ return components;
}
- /** Return the positionIncrementGap from the analyzer assigned to fieldName */
@Override
- public int getPositionIncrementGap(String fieldName) {
- Analyzer analyzer = fieldAnalyzers.get(fieldName);
- if (analyzer == null)
- analyzer = defaultAnalyzer;
- return analyzer.getPositionIncrementGap(fieldName);
- }
-
- /** Return the offsetGap from the analyzer assigned to field */
- @Override
- public int getOffsetGap(IndexableField field) {
- Analyzer analyzer = fieldAnalyzers.get(field.name());
- if (analyzer == null) {
- analyzer = defaultAnalyzer;
- }
- return analyzer.getOffsetGap(field);
- }
-
- @Override
public String toString() {
return "PerFieldAnalyzerWrapper(" + fieldAnalyzers + ", default=" + defaultAnalyzer + ")";
}
Index: solr/core/src/java/org/apache/solr/schema/IndexSchema.java
===================================================================
--- solr/core/src/java/org/apache/solr/schema/IndexSchema.java (revision 1169470)
+++ solr/core/src/java/org/apache/solr/schema/IndexSchema.java (revision )
@@ -18,7 +18,8 @@
package org.apache.solr.schema;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.AnalyzerWrapper;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
@@ -41,8 +42,6 @@
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
-import java.io.Reader;
-import java.io.IOException;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -292,62 +291,49 @@
queryAnalyzer = new SolrQueryAnalyzer();
}
- private class SolrIndexAnalyzer extends Analyzer {
- protected final HashMap analyzers;
+ private class SolrIndexAnalyzer extends AnalyzerWrapper {
+ protected final HashMap analyzers;
SolrIndexAnalyzer() {
analyzers = analyzerCache();
}
- protected HashMap analyzerCache() {
- HashMap cache = new HashMap();
+ protected HashMap analyzerCache() {
+ HashMap cache = new HashMap();
- for (SchemaField f : getFields().values()) {
+ for (SchemaField f : getFields().values()) {
- Analyzer analyzer = f.getType().getAnalyzer();
+ ReusableAnalyzerBase analyzer = (ReusableAnalyzerBase) f.getType().getAnalyzer();
cache.put(f.getName(), analyzer);
}
return cache;
}
- protected Analyzer getAnalyzer(String fieldName)
- {
- Analyzer analyzer = analyzers.get(fieldName);
- return analyzer!=null ? analyzer : getDynamicFieldType(fieldName).getAnalyzer();
- }
-
@Override
- public TokenStream tokenStream(String fieldName, Reader reader)
- {
- return getAnalyzer(fieldName).tokenStream(fieldName,reader);
+ protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) {
+ ReusableAnalyzerBase analyzer = analyzers.get(fieldName);
+ return analyzer != null ? analyzer : (ReusableAnalyzerBase) getDynamicFieldType(fieldName).getAnalyzer();
}
@Override
- public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
- return getAnalyzer(fieldName).reusableTokenStream(fieldName,reader);
+ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
+ return components;
}
-
- @Override
- public int getPositionIncrementGap(String fieldName) {
- return getAnalyzer(fieldName).getPositionIncrementGap(fieldName);
- }
+ }
- }
-
private class SolrQueryAnalyzer extends SolrIndexAnalyzer {
@Override
- protected HashMap analyzerCache() {
- HashMap cache = new HashMap();
+ protected HashMap analyzerCache() {
+ HashMap cache = new HashMap();
for (SchemaField f : getFields().values()) {
- Analyzer analyzer = f.getType().getQueryAnalyzer();
+ ReusableAnalyzerBase analyzer = (ReusableAnalyzerBase) f.getType().getQueryAnalyzer();
cache.put(f.getName(), analyzer);
}
return cache;
}
@Override
- protected Analyzer getAnalyzer(String fieldName)
- {
- Analyzer analyzer = analyzers.get(fieldName);
- return analyzer!=null ? analyzer : getDynamicFieldType(fieldName).getQueryAnalyzer();
+ protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) {
+ ReusableAnalyzerBase analyzer = analyzers.get(fieldName);
+ return analyzer != null ? analyzer : (ReusableAnalyzerBase) getDynamicFieldType(fieldName).getQueryAnalyzer();
}
}
Index: solr/core/src/test/org/apache/solr/schema/IndexSchemaTest.java
===================================================================
--- solr/core/src/test/org/apache/solr/schema/IndexSchemaTest.java (revision 1169470)
+++ solr/core/src/test/org/apache/solr/schema/IndexSchemaTest.java (revision )
@@ -17,26 +17,25 @@
package org.apache.solr.schema;
-import java.util.HashMap;
-import java.util.Map;
-
+import org.apache.lucene.search.similarities.SimilarityProvider;
import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
-import org.apache.lucene.search.similarities.SimilarityProvider;
import org.junit.BeforeClass;
import org.junit.Test;
+import java.util.HashMap;
+import java.util.Map;
+
public class IndexSchemaTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema.xml");
- }
+ }
/**
* This test assumes the schema includes:
@@ -44,22 +43,22 @@
*
*/
@Test
- public void testDynamicCopy()
+ public void testDynamicCopy()
{
SolrCore core = h.getCore();
assertU(adoc("id", "10", "title", "test", "aaa_dynamic", "aaa"));
assertU(commit());
-
+
Map args = new HashMap();
args.put( CommonParams.Q, "title:test" );
args.put( "indent", "true" );
SolrQueryRequest req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
-
+
assertQ("Make sure they got in", req
,"//*[@numFound='1']"
,"//result/doc[1]/int[@name='id'][.='10']"
);
-
+
args = new HashMap();
args.put( CommonParams.Q, "aaa_dynamic:aaa" );
args.put( "indent", "true" );
@@ -87,47 +86,8 @@
assertTrue("wrong class", similarityProvider instanceof MockConfigurableSimilarityProvider);
assertEquals("is there an echo?", ((MockConfigurableSimilarityProvider)similarityProvider).getPassthrough());
}
-
+
@Test
- public void testRuntimeFieldCreation()
- {
- // any field manipulation needs to happen when you know the core will not
- // be accepting any requests. Typically this is done within the inform()
- // method. Since this is a single threaded test, we can change the fields
- // willi-nilly
-
- SolrCore core = h.getCore();
- IndexSchema schema = core.getSchema();
- final String fieldName = "runtimefield";
- SchemaField sf = new SchemaField( fieldName, schema.getFieldTypes().get( "string" ) );
- schema.getFields().put( fieldName, sf );
-
- // also register a new copy field (from our new field)
- schema.registerCopyField( fieldName, "dynamic_runtime" );
- schema.refreshAnalyzers();
-
- assertU(adoc("id", "10", "title", "test", fieldName, "aaa"));
- assertU(commit());
-
- SolrQuery query = new SolrQuery( fieldName+":aaa" );
- query.set( "indent", "true" );
- SolrQueryRequest req = new LocalSolrQueryRequest( core, query );
-
- assertQ("Make sure they got in", req
- ,"//*[@numFound='1']"
- ,"//result/doc[1]/int[@name='id'][.='10']"
- );
-
- // Check to see if our copy field made it out safely
- query.setQuery( "dynamic_runtime:aaa" );
- assertQ("Make sure they got in", req
- ,"//*[@numFound='1']"
- ,"//result/doc[1]/int[@name='id'][.='10']"
- );
- clearIndex();
- }
-
- @Test
public void testIsDynamicField() throws Exception {
SolrCore core = h.getCore();
IndexSchema schema = core.getSchema();
@@ -141,6 +101,5 @@
SolrCore core = h.getCore();
IndexSchema schema = core.getSchema();
assertFalse(schema.getField("id").multiValued());
-
}
}
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java (revision 1162347)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java (revision )
@@ -17,54 +17,37 @@
* limitations under the License.
*/
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.analysis.AnalyzerWrapper;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
-import java.io.Reader;
-import java.io.IOException;
-
/**
* This Analyzer limits the number of tokens while indexing. It is
* a replacement for the maximum field length setting inside {@link org.apache.lucene.index.IndexWriter}.
*/
-public final class LimitTokenCountAnalyzer extends Analyzer {
- private final Analyzer delegate;
+public final class LimitTokenCountAnalyzer extends AnalyzerWrapper {
+ private final ReusableAnalyzerBase delegate;
private final int maxTokenCount;
/**
* Build an analyzer that limits the maximum number of tokens per field.
*/
- public LimitTokenCountAnalyzer(Analyzer delegate, int maxTokenCount) {
+ public LimitTokenCountAnalyzer(ReusableAnalyzerBase delegate, int maxTokenCount) {
this.delegate = delegate;
this.maxTokenCount = maxTokenCount;
}
-
+
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new LimitTokenCountFilter(
- delegate.tokenStream(fieldName, reader), maxTokenCount
- );
+ protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) {
+ return delegate;
}
-
+
@Override
- public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
- return new LimitTokenCountFilter(
- delegate.reusableTokenStream(fieldName, reader), maxTokenCount
- );
+ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
+ return new TokenStreamComponents(components.getTokenizer(),
+ new LimitTokenCountFilter(components.getTokenStream(), maxTokenCount));
}
@Override
- public int getPositionIncrementGap(String fieldName) {
- return delegate.getPositionIncrementGap(fieldName);
- }
-
- @Override
- public int getOffsetGap(IndexableField field) {
- return delegate.getOffsetGap(field);
- }
-
- @Override
public String toString() {
return "LimitTokenCountAnalyzer(" + delegate.toString() + ", maxTokenCount=" + maxTokenCount + ")";
}
Index: solr/core/src/test/org/apache/solr/schema/IndexSchemaRuntimeFieldTest.java
===================================================================
--- solr/core/src/test/org/apache/solr/schema/IndexSchemaRuntimeFieldTest.java (revision )
+++ solr/core/src/test/org/apache/solr/schema/IndexSchemaRuntimeFieldTest.java (revision )
@@ -0,0 +1,72 @@
+package org.apache.solr.schema;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class IndexSchemaRuntimeFieldTest extends SolrTestCaseJ4 {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig.xml","schema.xml");
+ }
+
+ @Test
+ public void testRuntimeFieldCreation() {
+ // any field manipulation needs to happen when you know the core will not
+ // be accepting any requests. Typically this is done within the inform()
+ // method. Since this is a single threaded test, we can change the fields
+ // willi-nilly
+
+ SolrCore core = h.getCore();
+ IndexSchema schema = core.getSchema();
+ final String fieldName = "runtimefield";
+ SchemaField sf = new SchemaField( fieldName, schema.getFieldTypes().get( "string" ) );
+ schema.getFields().put( fieldName, sf );
+
+ // also register a new copy field (from our new field)
+ schema.registerCopyField( fieldName, "dynamic_runtime" );
+ schema.refreshAnalyzers();
+
+ assertU(adoc("id", "10", "title", "test", fieldName, "aaa"));
+ assertU(commit());
+
+ SolrQuery query = new SolrQuery( fieldName+":aaa" );
+ query.set( "indent", "true" );
+ SolrQueryRequest req = new LocalSolrQueryRequest( core, query );
+
+ assertQ("Make sure they got in", req
+ ,"//*[@numFound='1']"
+ ,"//result/doc[1]/int[@name='id'][.='10']"
+ );
+
+ // Check to see if our copy field made it out safely
+ query.setQuery( "dynamic_runtime:aaa" );
+ assertQ("Make sure they got in", req
+ ,"//*[@numFound='1']"
+ ,"//result/doc[1]/int[@name='id'][.='10']"
+ );
+ clearIndex();
+ }
+}
Index: solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
===================================================================
--- solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (revision 1144761)
+++ solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (revision )
@@ -113,7 +113,7 @@
}
}
- TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value)));
+ TokenStream tokenStream = tfac.create(tokenizerChain.initReader(new StringReader(value)));
List tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
Index: solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
===================================================================
--- solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java (revision 1144761)
+++ solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java (revision )
@@ -17,10 +17,7 @@
package org.apache.solr.analysis;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.CharStream;
-import org.apache.lucene.analysis.CharReader;
-import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.*;
import java.io.Reader;
@@ -52,11 +49,11 @@
public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
@Override
- public Reader charStream(Reader reader){
+ public Reader initReader(Reader reader) {
- if( charFilters != null && charFilters.length > 0 ){
+ if (charFilters != null && charFilters.length > 0) {
CharStream cs = CharReader.get( reader );
- for (int i=0; i analyzerPerField = new HashMap();
+ Map analyzerPerField = new HashMap();
analyzerPerField.put("special", new SimpleAnalyzer(TEST_VERSION_CURRENT));
PerFieldAnalyzerWrapper analyzer =
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java (revision 1170942)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java (revision )
@@ -17,11 +17,9 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.io.Reader;
-
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.AnalyzerWrapper;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
@@ -31,24 +29,24 @@
* A shingle is another name for a token based n-gram.
*
*/
-public final class ShingleAnalyzerWrapper extends Analyzer {
+public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
- private final Analyzer defaultAnalyzer;
+ private final ReusableAnalyzerBase defaultAnalyzer;
private final int maxShingleSize;
private final int minShingleSize;
private final String tokenSeparator;
private final boolean outputUnigrams;
private final boolean outputUnigramsIfNoShingles;
- public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer) {
+ public ShingleAnalyzerWrapper(ReusableAnalyzerBase defaultAnalyzer) {
this(defaultAnalyzer, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
}
- public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int maxShingleSize) {
+ public ShingleAnalyzerWrapper(ReusableAnalyzerBase defaultAnalyzer, int maxShingleSize) {
this(defaultAnalyzer, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize);
}
- public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int minShingleSize, int maxShingleSize) {
+ public ShingleAnalyzerWrapper(ReusableAnalyzerBase defaultAnalyzer, int minShingleSize, int maxShingleSize) {
this(defaultAnalyzer, minShingleSize, maxShingleSize, ShingleFilter.TOKEN_SEPARATOR, true, false);
}
@@ -68,7 +66,7 @@
* regardless of whether any shingles are available.
*/
public ShingleAnalyzerWrapper(
- Analyzer defaultAnalyzer,
+ ReusableAnalyzerBase defaultAnalyzer,
int minShingleSize,
int maxShingleSize,
String tokenSeparator,
@@ -140,48 +138,18 @@
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream wrapped;
- try {
- wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader);
- } catch (IOException e) {
- wrapped = defaultAnalyzer.tokenStream(fieldName, reader);
+ protected ReusableAnalyzerBase getWrappedAnalyzer(String fieldName) {
+ return defaultAnalyzer;
- }
+ }
- ShingleFilter filter = new ShingleFilter(wrapped, minShingleSize, maxShingleSize);
+
+ @Override
+ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
+ ShingleFilter filter = new ShingleFilter(components.getTokenStream(), minShingleSize, maxShingleSize);
filter.setMinShingleSize(minShingleSize);
filter.setMaxShingleSize(maxShingleSize);
filter.setTokenSeparator(tokenSeparator);
filter.setOutputUnigrams(outputUnigrams);
filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
- return filter;
+ return new TokenStreamComponents(components.getTokenizer(), filter);
}
-
- private class SavedStreams {
- TokenStream wrapped;
- ShingleFilter shingle;
- }
+}
-
- @Override
- public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
- SavedStreams streams = (SavedStreams) getPreviousTokenStream();
- if (streams == null) {
- streams = new SavedStreams();
- streams.wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader);
- streams.shingle = new ShingleFilter(streams.wrapped);
- setPreviousTokenStream(streams);
- } else {
- TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader);
- if (result != streams.wrapped) {
- /* the wrapped analyzer did not, create a new shingle around the new one */
- streams.wrapped = result;
- streams.shingle = new ShingleFilter(streams.wrapped);
- }
- }
- streams.shingle.setMaxShingleSize(maxShingleSize);
- streams.shingle.setMinShingleSize(minShingleSize);
- streams.shingle.setTokenSeparator(tokenSeparator);
- streams.shingle.setOutputUnigrams(outputUnigrams);
- streams.shingle.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
- return streams.shingle;
- }
-}
Index: lucene/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java (revision )
+++ lucene/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java (revision )
@@ -0,0 +1,89 @@
+package org.apache.lucene.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexableField;
+
+import java.io.Reader;
+
+/**
+ * Extension to {@link ReusableAnalyzerBase} suitable for Analyzers which wrap
+ * other Analyzers.
+ *
+ * {@link #getWrappedAnalyzer(String)} allows the Analyzer
+ * to wrap multiple Analyzers which are selected on a per field basis.
+ *
+ * {@link #wrapComponents(String, TokenStreamComponents)} allows the
+ * TokenStreamComponents of the wrapped Analyzer to then be wrapped
+ * (such as adding a new {@link TokenFilter} to form new TokenStreamComponents.
+ */
+public abstract class AnalyzerWrapper extends ReusableAnalyzerBase {
+
+ /**
+ * Creates a new AnalyzerWrapper. Since the {@link ReuseStrategy} of
+ * the wrapped Analyzers are unknown, {@link PerFieldReuseStrategy} is assumed
+ */
+ protected AnalyzerWrapper() {
+ super(new PerFieldReuseStrategy());
+ }
+
+ /**
+ * Retrieves the wrapped Analyzer appropriate for analyzing the field with
+ * the given name
+ *
+ * @param fieldName Name of the field which is to be analyzed
+ * @return Analyzer for the field with the given name. Assumed to be non-null
+ */
+ protected abstract ReusableAnalyzerBase getWrappedAnalyzer(String fieldName);
+
+ /**
+ * Wraps / alters the given TokenStreamComponents, taken from the wrapped
+ * Analyzer, to form new components. It is through this method that new
+ * TokenFilters can be added by AnalyzerWrappers.
+ *
+ *
+ * @param fieldName Name of the field which is to be analyzed
+ * @param components TokenStreamComponents taken from the wrapped Analyzer
+ * @return Wrapped / altered TokenStreamComponents.
+ */
+ protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components);
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) {
+ return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader));
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public final int getPositionIncrementGap(String fieldName) {
+ return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public final int getOffsetGap(IndexableField field) {
+ return getWrappedAnalyzer(field.name()).getOffsetGap(field);
+ }
+}
Index: solr/core/src/java/org/apache/solr/schema/BoolField.java
===================================================================
--- solr/core/src/java/org/apache/solr/schema/BoolField.java (revision 1162347)
+++ solr/core/src/java/org/apache/solr/schema/BoolField.java (revision )
@@ -30,7 +30,6 @@
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueBool;
import org.apache.solr.search.QParser;
-import org.apache.solr.search.function.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -70,7 +69,7 @@
protected final static Analyzer boolAnalyzer = new SolrAnalyzer() {
@Override
- public TokenStreamInfo getStream(String fieldName, Reader reader) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new Tokenizer(reader) {
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
boolean done = false;
@@ -95,7 +94,7 @@
}
};
- return new TokenStreamInfo(tokenizer, tokenizer);
+ return new TokenStreamComponents(tokenizer);
}
};
Index: solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java
===================================================================
--- solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java (revision 1144761)
+++ solr/core/src/java/org/apache/solr/analysis/SolrAnalyzer.java (revision )
@@ -25,11 +25,11 @@
/**
*
*/
-public abstract class SolrAnalyzer extends Analyzer {
+public abstract class SolrAnalyzer extends ReusableAnalyzerBase {
- int posIncGap=0;
+ int posIncGap = 0;
-
+
public void setPositionIncrementGap(int gap) {
- posIncGap=gap;
+ posIncGap = gap;
}
@Override
@@ -38,43 +38,13 @@
}
/** wrap the reader in a CharStream, if appropriate */
+ @Deprecated
- public Reader charStream(Reader reader){
+ public Reader charStream(Reader reader) {
return reader;
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return getStream(fieldName, reader).getTokenStream();
+ protected Reader initReader(Reader reader) {
+ return charStream(reader);
}
-
- public static class TokenStreamInfo {
- private final Tokenizer tokenizer;
- private final TokenStream tokenStream;
- public TokenStreamInfo(Tokenizer tokenizer, TokenStream tokenStream) {
- this.tokenizer = tokenizer;
- this.tokenStream = tokenStream;
- }
+}
- public Tokenizer getTokenizer() { return tokenizer; }
- public TokenStream getTokenStream() { return tokenStream; }
- }
-
-
- public abstract TokenStreamInfo getStream(String fieldName, Reader reader);
-
- @Override
- public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
- // if (true) return tokenStream(fieldName, reader);
- TokenStreamInfo tsi = (TokenStreamInfo)getPreviousTokenStream();
- if (tsi != null) {
- tsi.getTokenizer().reset(charStream(reader));
- // the consumer will currently call reset() on the TokenStream to hit all the filters.
- // this isn't necessarily guaranteed by the APIs... but is currently done
- // by lucene indexing in DocInverterPerField, and in the QueryParser
- return tsi.getTokenStream();
- } else {
- tsi = getStream(fieldName, reader);
- setPreviousTokenStream(tsi);
- return tsi.getTokenStream();
- }
- }
-}
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (revision 1170424)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (revision )
@@ -33,7 +33,7 @@
String variedFieldValues[] = {"the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "boring", "dog"};
String repetitiveFieldValues[] = {"boring", "boring", "vaguelyboring"};
RAMDirectory dir;
- Analyzer appAnalyzer;
+ ReusableAnalyzerBase appAnalyzer;
IndexReader reader;
QueryAutoStopWordAnalyzer protectedAnalyzer;
Index: solr/core/src/java/org/apache/solr/schema/FieldType.java
===================================================================
--- solr/core/src/java/org/apache/solr/schema/FieldType.java (revision 1169470)
+++ solr/core/src/java/org/apache/solr/schema/FieldType.java (revision )
@@ -389,7 +389,7 @@
}
@Override
- public TokenStreamInfo getStream(String fieldName, Reader reader) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer ts = new Tokenizer(reader) {
final char[] cbuf = new char[maxChars];
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@@ -406,7 +406,7 @@
}
};
- return new TokenStreamInfo(ts, ts);
+ return new TokenStreamComponents(ts);
}
}