Uploaded image for project: 'Apache Drill'
  1. Apache Drill
  2. DRILL-1308

CannotPlanException for TPC-DS query 8 due to large IN-list and Intersect operation

    XMLWordPrintableJSON

Details

    Description

      On TPC-DS query 8 gives a CannotPlanException. This is due to 2 reasons:
      1. Large (>= 20 items) IN-list for which we need a ValuesOperator implementation
      2. INTERSECT operation which is not yet supported.

      select * from (select store.s_store_name
      ,sum(ss.ss_net_profit)
      from store_sales ss
      ,date_dim dd
      ,store,
      (select ca_zip
      from (
      (SELECT substr(ca.ca_zip,1,5) ca_zip
      FROM customer_address ca
      WHERE substr(ca.ca_zip,1,5) IN (
      '16733','50732','51878','16933','33177','55974',
      '21338','90455','63106','78712','45114',
      '51090','44881','35526','91360','34986',
      '31893','28853','84061','25483','84541',
      '39275','56211','51199','85189','24292',
      '27477','46388','77218','21137','43660',
      '36509','77925','11691','26790','35256',
      '59221','42491','39214','35273','27293',
      '74258','68798','50936','19136','25240',
      '89163','21667','30941','61680','10425',
      '96787','84569','37596','84291','44843',
      '31487','24949','31269','62115','79494',
      '32194','62531','61655','40724','29091',
      '81608','77126','32704','79045','19008',
      '81581','59693','24689','79355','19635',
      '52025','83585','56103','80150','26203',
      '81571','85657','39672','62868','33498',
      '69453','25748','44145','35695','57860',
      '59532','76967','81235','22004','34487',
      '48499','47318','63039','77728','89774',
      '91640','76501','70137','37512','48507',
      '51980','34851','54884','30905','12745',
      '60630','42798','39923','47591','82518',
      '32982','14233','56444','79278','57791',
      '37395','93812','14062','21556','58923',
      '13595','87261','79484','24492','10389',
      '89526','21733','85078','35187','68025',
      '45624','25243','42027','50749','13870',
      '47072','17847','46413','11259','20221',
      '32961','14173','96788','77001','65695',
      '52542','39550','21651','68063','48779',
      '55702','16612','15953','22707','83997',
      '61460','18919','27616','55164','54421',
      '47268','66355','86846','74968','95883',
      '92832','37009','66903','38063','95421',
      '45640','55118','22721','54787','29772',
      '79121','85462','28380','34680','85831',
      '60615','60763','87605','10096','69252',
      '28329','68812','47734','36851','24290',
      '39067','32242','97396','45999','37157',
      '88891','71571','17941','12910','28800',
      '47548','11514','49224','50161','27249',
      '29522','50098','20810','23683','51862',
      '57007','43224','98002','65238','30719',
      '15735','70127','33927','96245','56649',
      '44640','34914','18833','72797','18380',
      '17256','75124','36114','44696','35472',
      '76579','52537','82424','44424','32748',
      '77516','17985','57725','34893','42886',
      '98097','62869','24984','80539','19716',
      '87183','63517','60342','42577','88040',
      '46167','75108','41469','73674','13253',
      '66716','36315','16812','85084','70345',
      '16291','84204','38177','41416','75043',
      '85969','52544','13572','21899','22356',
      '16473','25488','46385','18400','17159',
      '74763','34719','18588','39471','47156',
      '28837','84535','69380','54019','57251',
      '51378','43170','51671','40569','81767',
      '59720','68739','28324','24144','96283',
      '53359','11880','52839','13744','21434',
      '24927','99581','87926','93557','34275',
      '12144','82294','39717','28926','89184',
      '29862','38378','91135','17811','57160',
      '74994','34074','51040','69828','65826',
      '84570','24660','15444','62133','83549',
      '15555','80929','27543','86821','98908',
      '89602','68316','69972','40191','97204',
      '42699','56262','69604','44040','48466',
      '55692','14302','38041','33734','47513',
      '46513','16039','81050','34048','30741',
      '18213','99574','27215','60005','47953',
      '29145','14682','50833','74174','86506',
      '57452','92971','70344','66483','99501',
      '78134','79445','82179','44114','19591',
      '20096','85999','52672','47030','74464',
      '30215','59015','42068','25463','26536',
      '53394','43508','41140','29335','37130',
      '43967','22686','78500','70281','20148',
      '54335','31575','79592','16787'))
      intersect
      (select ca_zip
      from (SELECT substr(ca.ca_zip,1,5) ca_zip,count cnt
      FROM customer_address ca, customer
      WHERE ca.ca_address_sk = customer.c_current_addr_sk and
      customer.c_preferred_cust_flag='Y'
      group by ca.ca_zip
      having count > 10)A1))A2) V1
      where ss.ss_store_sk = store.s_store_sk
      and ss.ss_sold_date_sk = dd.d_date_sk
      and dd.d_qoy = 1 and dd.d_year = 2001
      and (substr(store.s_zip,1,2) = substr(V1.ca_zip,1,2))
      group by store.s_store_name
      order by store.s_store_name
      ) limit 100;

      message: "Failure while parsing sql. < CannotPlanException:[ Node [rel#35871:Subset#27.LOGICAL.ANY([]).[]] could not be implemented; planner state:

      Root: rel#35871:Subset#27.LOGICAL.ANY([]).[]
      Original rel:
      AbstractConverter(subset=[rel#35871:Subset#27.LOGICAL.ANY([]).[]], convention=[LOGICAL], DrillDistributionTraitDef=[ANY([])], sort=[[]]): rowcount = 1.7976931348623157E308, cumulative cost =

      {inf}

      , id = 35872
      SortRel(subset=[rel#35870:Subset#27.NONE.ANY([]).[]], fetch=[100]): rowcount = 1.7976931348623157E308, cumulative cost =

      {Infinity rows, 1.7976931348623157E308 cpu, 0.0 io, 0.0 network}, id = 35869
      ProjectRel(subset=[rel#35868:Subset#26.NONE.ANY([]).[0]], s_store_name=[$0], EXPR$1=[$1]): rowcount = 1.7976931348623157E308, cumulative cost = {1.7976931348623157E308 rows, Infinity cpu, 0.0 io, 0.0 network}, id = 35867
      SortRel(subset=[rel#35866:Subset#25.NONE.ANY([]).[0]], sort0=[$0], dir0=[ASC]): rowcount = 1.7976931348623157E308, cumulative cost = {Infinity rows, 1.7976931348623157E308 cpu, 0.0 io, 0.0 network}

      , id = 35865
      AggregateRel(subset=[rel#35864:Subset#24.NONE.ANY([]).[]], group=[

      {0}

      ], EXPR$1=[SUM($1)]): rowcount = 1.7976931348623158E307, cumulative cost =

      {1.7976931348623158E307 rows, 0.0 cpu, 0.0 io, 0.0 network}

      , id = 35863
      ProjectRel(subset=[rel#35862:Subset#23.NONE.ANY([]).[]], s_store_name=[$9], ss_net_profit=[$3]): rowcount = 1.7976931348623157E308, cumulative cost =

      {1.7976931348623157E308 rows, Infinity cpu, 0.0 io, 0.0 network}

      , id = 35861
      FilterRel(subset=[rel#35860:Subset#22.NONE.ANY([]).[]], condition=[AND(=($1, $11), =($2, $7), =(CAST($5):INTEGER, 1), =(CAST($6):INTEGER, 2001), =(SUBSTR($10, 1, 2), SUBSTR($12, 1, 2)))]): rowcount = 1.3651232242860708E304, cumulative cost =

      {1.3651232242860708E304 rows, 1.7976931348623157E308 cpu, 0.0 io, 0.0 network}

      , id = 35859
      JoinRel(subset=[rel#35858:Subset#21.NONE.ANY([]).[]], condition=[true], joinType=[inner]): rowcount = 1.7976931348623157E308, cumulative cost =

      {1.7976931348623157E308 rows, 0.0 cpu, 0.0 io, 0.0 network}, id = 35857
      JoinRel(subset=[rel#35827:Subset#4.NONE.ANY([]).[]], condition=[true], joinType=[inner]): rowcount = 1.7976931348623157E308, cumulative cost = {1.7976931348623157E308 rows, 0.0 cpu, 0.0 io, 0.0 network}

      , id = 35826
      JoinRel(subset=[rel#35824:Subset#2.NONE.ANY([]).[]], condition=[true], joinType=[inner]): rowcount = 10000.0, cumulative cost =

      {10000.0 rows, 0.0 cpu, 0.0 io, 0.0 network}

      , id = 35823
      EnumerableTableAccessRel(subset=[rel#35821:Subset#0.ENUMERABLE.ANY([]).[]], table=[[dfs, tpcds, store_sales]]): rowcount = 100.0, cumulative cost =

      {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network}, id = 35792
      EnumerableTableAccessRel(subset=[rel#35822:Subset#1.ENUMERABLE.ANY([]).[]], table=[[dfs, tpcds, date_dim]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network}

      , id = 35793
      EnumerableTableAccessRel(subset=[rel#35825:Subset#3.ENUMERABLE.ANY([]).[]], table=[[dfs, tpcds, store]]): rowcount = 100.0, cumulative cost =

      {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network}, id = 35795
      ProjectRel(subset=[rel#35856:Subset#20.NONE.ANY([]).[]], ca_zip=[$0]): rowcount = 1.7976931348623157E308, cumulative cost = {1.7976931348623157E308 rows, 1.7976931348623157E308 cpu, 0.0 io, 0.0 network}, id = 35855
      IntersectRel(subset=[rel#35854:Subset#19.NONE.ANY([]).[]], all=[false]): rowcount = 4.4942328371557893E307, cumulative cost = {4.4942328371557893E307 rows, 4.4942328371557893E307 cpu, 0.0 io, 0.0 network}, id = 35853
      ProjectRel(subset=[rel#35837:Subset#10.NONE.ANY([]).[]], ca_zip=[SUBSTR($1, 1, 5)]): rowcount = 1.7976931348623157E308, cumulative cost = {1.7976931348623157E308 rows, 1.7976931348623157E308 cpu, 0.0 io, 0.0 network}, id = 35836
      JoinRel(subset=[rel#35835:Subset#9.NONE.ANY([]).[]], condition=[=($3, $4)], joinType=[inner]): rowcount = 1.7976931348623157E308, cumulative cost = {1.7976931348623157E308 rows, 0.0 cpu, 0.0 io, 0.0 network}, id = 35834
      ProjectRel(subset=[rel#35830:Subset#6.NONE.ANY([]).[]], $f0=[$0], $f1=[$1], $f2=[$2], $f3=[SUBSTR($1, 1, 5)]): rowcount = 100.0, cumulative cost = {100.0 rows, 400.0 cpu, 0.0 io, 0.0 network}, id = 35829
      EnumerableTableAccessRel(subset=[rel#35828:Subset#5.ENUMERABLE.ANY([]).[]], table=[[dfs, tpcds, customer_address]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network}

      , id = 35797

      Attachments

        Issue Links

          Activity

            People

              seanhychu Sean Hsuan-Yi Chu
              amansinha100 Aman Sinha
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: