Uploaded image for project: 'Calcite'
  1. Calcite
  2. CALCITE-5540

Wrong plan for query with COUNT(DISTINCT subQuery)

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Open
    • Major
    • Resolution: Unresolved
    • None
    • None
    • None
    • None

    Description

      A query with two "COUNT(DISTINCT (SELECT ...))" aggregate functions generates an incorrect or perhaps inefficient plan when SqlToRelConverter.expand=false.

      The query occurs in LatticeSuggesterTest (enabled only with slow tests) and the error is discussed in CALCITE-5537.

      peng713128 wrote:

      I think output is wrong. And I prefer to set expansion to true.
      One edge of LatticeSpace graph is missed when the subquery is not expanded.
      The missing case is as follow, the relationship of warehouse#warehouse_class_id and warehouse_class#warehouse_class_id is missing.
      I think it should add to the graph.

      foodmart query_id:6739

        select
        "store"."store_type" as "c0",
        count(
          distinct (
            select
              "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
            from
              "warehouse_class" AS "warehouse_class"
            where
              "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
              and "warehouse_class"."description" = 'Large Owned'
          )
        ) as "m0",
        count(
          distinct (
            select
              "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
            from
              "warehouse_class" AS "warehouse_class"
            where
              "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
              and "warehouse_class"."description" = 'Large Independent'
          )
        ) as "m1",
        count(
          (
            select
              "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
            from
              "warehouse_class" AS "warehouse_class"
            where
              "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
              and "warehouse_class"."description" = 'Large Independent'
          )
        ) as "m2",
        count(distinct "store_id" + "warehouse_id") as "m3",
        count("store_id" + "warehouse_id") as "m4",
        count("warehouse"."stores_id") as "m5"
      from
        "store" as "store",
        "warehouse" as "warehouse"
      where
        "warehouse"."stores_id" = "store"."store_id"
      group by
        "store"."store_type"

      when set SqlToRelConverter.Config.expand = true

      LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
        LogicalProject(c0=[$1], $f0=[$39], $f00=[$40], $f01=[$42], $f4=[+($0, $24)], stores_id=[$26])
          LogicalJoin(condition=[=($25, $41)], joinType=[left])
            LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$39], $f040=[$41])
              LogicalJoin(condition=[=($25, $40)], joinType=[left])
                LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$40])
                  LogicalJoin(condition=[=($25, $39)], joinType=[left])
                    LogicalJoin(condition=[=($26, $0)], joinType=[inner])
                      JdbcTableScan(table=[[foodmart, store]])
                      JdbcTableScan(table=[[foodmart, warehouse]])
                    LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
                      LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
                        LogicalFilter(condition=[=($1, 'Large Owned')])
                          JdbcTableScan(table=[[foodmart, warehouse_class]])
                LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
                  LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
                    LogicalFilter(condition=[=($1, 'Large Independent')])
                      JdbcTableScan(table=[[foodmart, warehouse_class]])
            LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
              LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
                LogicalFilter(condition=[=($1, 'Large Independent')])
                  JdbcTableScan(table=[[foodmart, warehouse_class]])

      when set SqlToRelConverter.Config.expand = false

      LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
        LogicalProject(c0=[$1], $f1=[$SCALAR_QUERY({
      LogicalProject(warehouse_class_id=[$0])
        LogicalFilter(condition=[AND(=($0, $cor0.warehouse_class_id), =($1, 'Large Owned'))])
          JdbcTableScan(table=[[foodmart, warehouse_class]])
      })], $f2=[$SCALAR_QUERY({
      LogicalProject(warehouse_class_id=[$0])
        LogicalFilter(condition=[AND(=($0, $cor1.warehouse_class_id), =($1, 'Large Independent'))])
          JdbcTableScan(table=[[foodmart, warehouse_class]])
      })], $f3=[$SCALAR_QUERY({
      LogicalProject(warehouse_class_id=[$0])
        LogicalFilter(condition=[AND(=($0, $cor2.warehouse_class_id), =($1, 'Large Independent'))])
          JdbcTableScan(table=[[foodmart, warehouse_class]])
      })], $f4=[+($0, $24)], stores_id=[$26])
          LogicalFilter(condition=[=($26, $0)])
            LogicalJoin(condition=[true], joinType=[inner])
              JdbcTableScan(table=[[foodmart, store]])
              JdbcTableScan(table=[[foodmart, warehouse]])

      Attachments

        Issue Links

          Activity

            People

              Unassigned Unassigned
              julianhyde Julian Hyde
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated: