Uploaded image for project: 'Solr'
  1. Solr
  2. SOLR-6299

Facet count on facet queries returns different results if #shards > 1

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Not A Problem
    • 6.0
    • None
    • SolrCloud

    Description

      I am trying to run some facet counts on facet queries and looks like i am getting different counts if i use >1 shards in the SolrCloud cluster.

      Here is the upstream unit test:
      https://github.com/apache/lucene-solr/blob/trunk/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java#L173

      Setup:

      • Ingested 5 solr docs.
        {
          "responseHeader": {
            "status": 0,
            "QTime": 22,
            "params": {
              "indent": "true",
              "q": "*:*",
              "_": "1406346687337",
              "wt": "json"
            }
          },
          "response": {
            "numFound": 5,
            "start": 0,
            "maxScore": 1,
            "docs": [
              {
                "id": 2004,
                "range_facet_l": [
                  2004
                ],
                "hotel_s1": "b",
                "airport_s1": "ams",
                "duration_i1": 5,
                "_version_": 1474661321774465000,
                "timestamp": "2014-07-26T03:50:27.975Z",
                "multiDefault": [
                  "muLti-Default"
                ],
                "intDefault": 42
              },
              {
                "id": 2000,
                "range_facet_l": [
                  2000
                ],
                "hotel_s1": "a",
                "airport_s1": "ams",
                "duration_i1": 5,
                "_version_": 1474661323604230100,
                "timestamp": "2014-07-26T03:50:29.734Z",
                "multiDefault": [
                  "muLti-Default"
                ],
                "intDefault": 42
              },
              {
                "id": 2003,
                "range_facet_l": [
                  2003
                ],
                "hotel_s1": "b",
                "airport_s1": "ams",
                "duration_i1": 5,
                "_version_": 1474661326312702000,
                "timestamp": "2014-07-26T03:50:32.317Z",
                "multiDefault": [
                  "muLti-Default"
                ],
                "intDefault": 42
              },
              {
                "id": 2001,
                "range_facet_l": [
                  2001
                ],
                "hotel_s1": "a",
                "airport_s1": "dus",
                "duration_i1": 10,
                "_version_": 1474661326389248000,
                "timestamp": "2014-07-26T03:50:32.375Z",
                "multiDefault": [
                  "muLti-Default"
                ],
                "intDefault": 42
              },
              {
                "id": 2002,
                "range_facet_l": [
                  2002
                ],
                "hotel_s1": "b",
                "airport_s1": "ams",
                "duration_i1": 10,
                "_version_": 1474661326464745500,
                "timestamp": "2014-07-26T03:50:32.446Z",
                "multiDefault": [
                  "muLti-Default"
                ],
                "intDefault": 42
              }
            ]
          }
        }
        

      Here is the query being run:

      Test code:
          assertQ(
              req(
                  "q", "*:*",
                  "fq", "id:[2000 TO 2004]",
                  "group", "true",
                  "group.facet", "true",
                  "group.field", "hotel_s1",
                  "facet", "true",
                  "facet.limit", facetLimit,
                  "facet.query", "airport_s1:ams"
              ),
              "//lst[@name='facet_queries']/int[@name='airport_s1:ams'][.='2']"
          );
      
      $ curl  "http://localhost:8983/solr/collection1/select?facet=true&facet.query=airport_s1%3Aams&q=*%3A*&facet.limit=-100&group.field=hotel_s1&group=true&group.facet=true&fq=id%3A%5B2000+TO+2004%5D&indent=true&wt=xml" 
      

      Now, if i issue a query statement - On 1 shard system (Works as expected)

      $ curl  "http://localhost:8983/solr/collection1/select?facet=true&facet.query=airport_s1%3Aams&q=*%3A*&facet.limit=-100&group.field=hotel_s1&group=true&group.facet=true&fq=id%3A%5B2000+TO+2004%5D&indent=true&wt=xml" 
      
      <?xml version="1.0" encoding="UTF-8"?>
      <response>
      
      <lst name="responseHeader">
        <int name="status">0</int>
        <int name="QTime">17</int>
        <lst name="params">
          <str name="facet">true</str>
          <str name="indent">true</str>
          <str name="facet.query">airport_s1:ams</str>
          <str name="q">*:*</str>
          <str name="facet.limit">-100</str>
          <str name="group.field">hotel_s1</str>
          <str name="group">true</str>
          <str name="wt">xml</str>
          <str name="fq">id:[2000 TO 2004]</str>
          <str name="group.facet">true</str>
        </lst>
      </lst>
      <lst name="grouped">
        <lst name="hotel_s1">
          <int name="matches">5</int>
          <arr name="groups">
            <lst>
              <str name="groupValue">a</str>
              <result name="doclist" numFound="2" start="0">
                <doc>
                  <int name="id">2001</int>
                  <arr name="range_facet_l">
                    <long>2001</long>
                  </arr>
                  <str name="hotel_s1">a</str>
                  <str name="airport_s1">dus</str>
                  <int name="duration_i1">10</int>
                  <long name="_version_">1474989437819551744</long>
                  <date name="timestamp">2014-07-29T18:45:43.819Z</date>
                  <arr name="multiDefault">
                    <str>muLti-Default</str>
                  </arr>
                  <int name="intDefault">42</int></doc>
              </result>
            </lst>
            <lst>
              <str name="groupValue">b</str>
              <result name="doclist" numFound="3" start="0">
                <doc>
                  <int name="id">2003</int>
                  <arr name="range_facet_l">
                    <long>2003</long>
                  </arr>
                  <str name="hotel_s1">b</str>
                  <str name="airport_s1">ams</str>
                  <int name="duration_i1">5</int>
                  <long name="_version_">1474989439611568128</long>
                  <date name="timestamp">2014-07-29T18:45:45.528Z</date>
                  <arr name="multiDefault">
                    <str>muLti-Default</str>
                  </arr>
                  <int name="intDefault">42</int></doc>
              </result>
            </lst>
          </arr>
        </lst>
      </lst>
      <lst name="facet_counts">
        <lst name="facet_queries">
          <int name="airport_s1:ams">2</int>
        </lst>
        <lst name="facet_fields"/>
        <lst name="facet_dates"/>
        <lst name="facet_ranges"/>
      </lst>
      </response>
      

      Now, if i run the same query on 2 shard system, i see facet count as 3 instead of 2.

      Solr result on 2 shard cluster:

      [systest@search-testing-c5-1 search]$ curl  "http://localhost:8983/solr/collection1/select?facet=true&facet.query=airport_s1%3Aams&q=*%3A*&facet.limit=-100&group.field=hotel_s1&group=true&group.facet=true&fq=id%3A%5B2000+TO+2004%5D&indent=true&wt=xml" 
      <?xml version="1.0" encoding="UTF-8"?>
      <response>
      
      <lst name="responseHeader">
        <int name="status">0</int>
        <int name="QTime">69</int>
        <lst name="params">
          <str name="facet">true</str>
          <str name="indent">true</str>
          <str name="facet.query">airport_s1:ams</str>
          <str name="q">*:*</str>
          <str name="facet.limit">-100</str>
          <str name="group.field">hotel_s1</str>
          <str name="group">true</str>
          <str name="wt">xml</str>
          <str name="fq">id:[2000 TO 2004]</str>
          <str name="group.facet">true</str>
        </lst>
      </lst>
      <lst name="grouped">
        <lst name="hotel_s1">
          <int name="matches">5</int>
          <arr name="groups">
            <lst>
              <str name="groupValue">b</str>
              <result name="doclist" numFound="3" start="0" maxScore="1.0">
                <doc>
                  <int name="id">2002</int>
                  <arr name="range_facet_l">
                    <long>2002</long>
                  </arr>
                  <str name="hotel_s1">b</str>
                  <str name="airport_s1">ams</str>
                  <int name="duration_i1">10</int>
                  <long name="_version_">1474661326464745472</long>
                  <date name="timestamp">2014-07-26T03:50:32.446Z</date>
                  <arr name="multiDefault">
                    <str>muLti-Default</str>
                  </arr>
                  <int name="intDefault">42</int></doc>
              </result>
            </lst>
            <lst>
              <str name="groupValue">a</str>
              <result name="doclist" numFound="2" start="0" maxScore="1.0">
                <doc>
                  <int name="id">2001</int>
                  <arr name="range_facet_l">
                    <long>2001</long>
                  </arr>
                  <str name="hotel_s1">a</str>
                  <str name="airport_s1">dus</str>
                  <int name="duration_i1">10</int>
                  <long name="_version_">1474661326389248000</long>
                  <date name="timestamp">2014-07-26T03:50:32.375Z</date>
                  <arr name="multiDefault">
                    <str>muLti-Default</str>
                  </arr>
                  <int name="intDefault">42</int></doc>
              </result>
            </lst>
          </arr>
        </lst>
      </lst>
      <lst name="facet_counts">
        <lst name="facet_queries">
          <int name="airport_s1:ams">3</int>
        </lst>
        <lst name="facet_fields"/>
        <lst name="facet_dates"/>
        <lst name="facet_ranges"/>
      </lst>
      </response>
      

      In order to replicate this, we can simply run the above test on >1 shard system and the solr response will be different.

      Attachments

        Activity

          People

            Unassigned Unassigned
            vamsee Vamsee K. Yarlagadda
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: