Details
-
Bug
-
Status: Open
-
Major
-
Resolution: Unresolved
-
5.0
-
None
-
Linux vld091 3.2.0-4-amd64 #1 SMP Debian 3.2.51-1 x86_64 GNU/Linux
Description
Possibly related to issue 3245 (https://issues.apache.org/jira/browse/SOLR-3245). Symptoms are exactly the same.
HunspellStemFilterFactory with Czech dictionary is 100s times slower than CzechStemFilterFactory.
Analyzer setup:
<fieldtype name="text_cs" class="solr.TextField">
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
stemEnglishPossessive="0" />
<filter class="solr.HunspellStemFilterFactory"
dictionary="cs_CZ.dic"
affix="cs_CZ.aff"
ignoreCase="true"
strictAffixParsing="true" />
<filter class="solr.ASCIIFoldingFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="1"
catenateNumbers="1"
catenateAll="0"
stemEnglishPossessive="0" />
<filter class="solr.HunspellStemFilterFactory"
dictionary="cs_CZ.dic"
affix="cs_CZ.aff"
ignoreCase="true"
strictAffixParsing="true" />
<filter class="solr.ASCIIFoldingFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
</fieldtype>