Uploaded image for project: 'Solr'
  1. Solr
  2. SOLR-10363

ComplexPhrase WildCard Case Sensitivy problem

    XMLWordPrintableJSON

Details

    Description

      I encounter a problem with ComplexPhrase and TurkishLowerCaseFilterFactory.

      When I search like

      {!complexphrase}SContent_tinx:"6* YAŞINDA",
      {!complexphrase}

      SContent_tinx:"6* yaşında" , results is correct.

      {
        "responseHeader":{
          "zkConnected":true,
          "status":0,
          "QTime":104,
          "params":{
            "q":"{!complexphrase}SContent_tinx:\"6* YAŞINDa\"",
            "debug":"query",
            "indent":"on",
            "rows":"0",
            "wt":"json",
            "_":"1490456571184"}},
        "response":{"numFound":15,"start":0,"maxScore":5972.9,"docs":[]
        },
        "debug":{
          "rawquerystring":"{!complexphrase}SContent_tinx:\"6* YAŞINDa\"",
          "querystring":"{!complexphrase}SContent_tinx:\"6* YAŞINDa\"",
          "parsedquery":"ComplexPhraseQuery(\"6* YAŞINDa\")",
          "parsedquery_toString":"\"6* YAŞINDa\"",
          "QParser":"ComplexPhraseQParser"}}
      

      But If I want to search like

      {!complexphrase}SContent_tinx:"60 YAŞIND*" or {!complexphrase}

      SContent_tinx:"60 yaşınd*", NumFound is 0 or result changes according to Case Sensitive condition.

      {
        "responseHeader":{
          "zkConnected":true,
          "status":0,
          "QTime":10,
          "params":{
            "q":"{!complexphrase}SContent_tinx:\"60 YAŞIND*\"",
            "debug":"query",
            "indent":"on",
            "rows":"0",
            "wt":"json",
            "_":"1490456571184"}},
        "response":{"numFound":0,"start":0,"maxScore":0.0,"docs":[]
        },
        "debug":{
          "rawquerystring":"{!complexphrase}SContent_tinx:\"60 YAŞIND*\"",
          "querystring":"{!complexphrase}SContent_tinx:\"60 YAŞIND*\"",
          "parsedquery":"ComplexPhraseQuery(\"60 YAŞIND*\")",
          "parsedquery_toString":"\"60 YAŞIND*\"",
          "QParser":"ComplexPhraseQParser"}}
      

      When I search without ComplexPhraseQueryParser, numbers of results are same for 60 YAŞIND* and 60 yaşınd*

      {
        "responseHeader":{
          "zkConnected":true,
          "status":0,
          "QTime":56,
          "params":{
            "q":"60 yaŞınd*",
            "indent":"on",
            "rows":"0",
            "wt":"json",
            "debugQuery":"on",
            "_":"1490456571184"}},
        "response":{"numFound":776,"start":0,"maxScore":7.633286,"docs":[]
        },
        "debug":{
          "rawquerystring":"60 yaŞınd*",
          "querystring":"60 yaŞınd*",
          "parsedquery":"SContent_tinx:60 SContent_tinx:yaşınd*",
          "parsedquery_toString":"SContent_tinx:60 SContent_tinx:yaşınd*",
          "QParser":"LuceneQParser",
          "explain":{}}}
      
      <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
               <analyzer type="index">
             <tokenizer class="solr.StandardTokenizerFactory"/>
      	<filter class="solr.ApostropheFilterFactory"/>
      		<filter class="solr.PatternReplaceFilterFactory"
                      pattern="[^a-zA-Z0-9üğşçıiöâÜĞŞÇIİÖÂ@# ]" replacement=" " replace="all"/>
      				<filter class="solr.TrimFilterFactory"/>
      		<filter class="solr.TurkishLowerCaseFilterFactory"/>
      		 <filter class="solr.ReversedWildcardFilterFactory" />
          <!-- <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
                 maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>-->
      	</analyzer>
            <analyzer type="query">
              <tokenizer class="solr.StandardTokenizerFactory"/>
      	<filter class="solr.ApostropheFilterFactory"/>
      <filter class="solr.PatternReplaceFilterFactory"
                      pattern="[^a-zA-Z0-9üğşçıiöâÜĞŞÇIİÖÂ@# ]" replacement=" " replace="all"/>
      				<filter class="solr.TrimFilterFactory"/>
        <filter class="solr.TurkishLowerCaseFilterFactory"/>
            </analyzer>
          </fieldType>
      

      Attachments

        1. TestComplexPhraseTurkish.java
          2 kB
          Mikhail Khludnev
        2. TestComplexPhraseTurkishECIL.java
          3 kB
          Eyyub ÇİL
        3. complexPhraseWildCardBug.zip
          22 kB
          Eyyub ÇİL
        4. SOLR_10363_20170328.patch
          20 kB
          Eyyub ÇİL

        Activity

          People

            Unassigned Unassigned
            ecil Eyyub ÇİL
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated: