Uploaded image for project: 'Apache Drill'
  1. Apache Drill
  2. DRILL-2380

TPC-DS Query 33 and simplified variants return wrong results

    XMLWordPrintableJSON

    Details

      Description

      TPC-DS query 33 returns wrong results.

      WITH ss 
           AS (SELECT i_manufact_id, 
                      Sum(ss_ext_sales_price) total_sales 
               FROM   store_sales, 
                      date_dim, 
                      customer_address, 
                      item 
               WHERE  i_manufact_id IN (SELECT i_manufact_id 
                                        FROM   item 
                                        WHERE  i_category IN ( 'Books' )) 
                      AND ss_item_sk = i_item_sk 
                      AND ss_sold_date_sk = d_date_sk 
                      AND d_year = 1999 
                      AND d_moy = 3 
                      AND ss_addr_sk = ca_address_sk 
                      AND ca_gmt_offset = -5 
               GROUP  BY i_manufact_id), 
           cs 
           AS (SELECT i_manufact_id, 
                      Sum(cs_ext_sales_price) total_sales 
               FROM   catalog_sales, 
                      date_dim, 
                      customer_address, 
                      item 
               WHERE  i_manufact_id IN (SELECT i_manufact_id 
                                        FROM   item 
                                        WHERE  i_category IN ( 'Books' )) 
                      AND cs_item_sk = i_item_sk 
                      AND cs_sold_date_sk = d_date_sk 
                      AND d_year = 1999 
                      AND d_moy = 3 
                      AND cs_bill_addr_sk = ca_address_sk 
                      AND ca_gmt_offset = -5 
               GROUP  BY i_manufact_id), 
           ws 
           AS (SELECT i_manufact_id, 
                      Sum(ws_ext_sales_price) total_sales 
               FROM   web_sales, 
                      date_dim, 
                      customer_address, 
                      item 
               WHERE  i_manufact_id IN (SELECT i_manufact_id 
                                        FROM   item 
                                        WHERE  i_category IN ( 'Books' )) 
                      AND ws_item_sk = i_item_sk 
                      AND ws_sold_date_sk = d_date_sk 
                      AND d_year = 1999 
                      AND d_moy = 3 
                      AND ws_bill_addr_sk = ca_address_sk 
                      AND ca_gmt_offset = -5 
               GROUP  BY i_manufact_id) 
      SELECT i_manufact_id, 
                     Sum(total_sales) total_sales 
      FROM   (SELECT i_manufact_id, total_sales 
              FROM   ss 
              UNION ALL 
              SELECT i_manufact_id, total_sales
              FROM   cs 
              UNION ALL 
              SELECT i_manufact_id, total_sales
              FROM   ws) tmp1 
      GROUP  BY i_manufact_id 
      ORDER  BY total_sales
      LIMIT 10;
      
      Drill Results:
      +---------------+-------------+
      | i_manufact_id | total_sales |
      +---------------+-------------+
      | 440           | 0.12        |
      | 434           | 13.16       |
      | 415           | 14.04       |
      | 449           | 15.63       |
      | 563           | 31.46       |
      | 357           | 49.50       |
      | 624           | 67.94       |
      | 192           | 74.40       |
      | 137           | 83.42       |
      | 240           | 85.26       |
      +---------------+-------------+
      10 rows selected (7.57 seconds)
      
      Postgres Results:
       i_manufact_id | total_sales 
      ---------------+-------------
                 930 |        1.18
                 818 |       41.86
                 913 |      141.90
                 784 |      184.90
                 488 |      275.08
                 993 |      301.60
                 700 |      340.52
                 895 |      802.30
                 766 |      839.76
                 858 |      859.18
      (10 rows)
      

      The following simplified variants also return wrong results:

      SELECT sum(x)
      FROM
      (SELECT ss_ext_sales_price x, ss_item_sk
      FROM  store_sales
       GROUP BY ss_item_sk, ss_ext_sales_price
      UNION ALL
      SELECT cs_ext_sales_price x, cs_item_sk
      FROM catalog_sales
      GROUP BY cs_item_sk, cs_ext_sales_price) tmp
      GROUP BY x
      LIMIT 10;
      
      Drill Results:
      +------------+
      |   EXPR$0   |
      +------------+
      | 14141.40   |
      | 28060.00   |
      | 30912.70   |
      | 43706.88   |
      | 38267.64   |
      | 10173.00   |
      | 37829.25   |
      | 5349.50    |
      | 107515.80  |
      | 4440.84    |
      +------------+
      10 rows selected (14.435 seconds)
      
      Postgres Results:
         sum    
      ----------   
       45234.00
        5735.31
        2275.60
        6921.32
        2590.46
        6615.09
       14080.77
       24819.76
       25127.20
      (10 rows)
      
      SELECT sum(x)
      FROM
      (SELECT sum(ss_ext_sales_price) x, ss_item_sk
      FROM  store_sales
       GROUP BY ss_item_sk
      UNION ALL
      SELECT sum(cs_ext_sales_price) x, cs_item_sk
      FROM catalog_sales
      GROUP BY cs_item_sk) tmp
      GROUP BY x
      LIMIT 10;
      
      Drill Results:
      +------------+
      |   EXPR$0   |
      +------------+
      | 211411.58  |
      | 347027.93  |
      | 534760.93  |
      | 203028.28  |
      | 500939.61  |
      | 248226.81  |
      | 242664.29  |
      | 597659.03  |
      | 258909.73  |
      | 223624.06  |
      +------------+
      10 rows selected (5.245 seconds)
      
      Postgres Results:
          sum    
      -----------
       252711.42
       173571.97
       206191.60
       249793.96
       170825.75
       127718.29
       220887.50
       119390.44
       217495.66
       284348.93
      (10 rows)
      
      SELECT x
      FROM
      (SELECT ss_ext_sales_price x, ss_item_sk
      FROM  store_sales
       GROUP BY ss_item_sk, ss_ext_sales_price
      UNION ALL
      SELECT cs_ext_sales_price x, cs_item_sk
      FROM catalog_sales
      GROUP BY cs_item_sk, cs_ext_sales_price) tmp
      GROUP BY x
      LIMIT 10;
      
      Drill Results:
      +------------+
      |     x      |
      +------------+
      | 271.95     |
      | 561.20     |
      | 391.30     |
      | 1821.12    |
      | 2125.98    |
      | 1695.50    |
      | 1513.17    |
      | 411.50     |
      | 4674.60    |
      | 193.08     |
      +------------+
      10 rows selected (9.518 seconds)
      
      Postgres Results:
          x    
      ---------
       9046.80
       5735.31
        568.90
       3460.66
       1295.23
       6615.09
       4693.59
       6204.94
       6281.80
      (10 rows)
      

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                seanhychu Sean Hsuan-Yi Chu
                Reporter:
                agirish Abhishek Girish
              • Votes:
                0 Vote for this issue
                Watchers:
                4 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: