Uploaded image for project: 'Apache Drill'
  1. Apache Drill
  2. DRILL-801

merge joins fail with ArrayIndexOutOfBoundsException en masse

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • None
    • 0.4.0
    • Execution - Flow
    • None

    Description

      Datasources: TPCH (10MB), three-way split parquet files
      git.commit.id.abbrev=5d7e3d3
      git.commit.id=5d7e3d3ab548eb2b23607df46ea843a9c1532b72

      All of the join queries in the smoke test suite with merge-join fail with ArrayIndexOutOfBoundsException. An example follows:

      0: jdbc:drill:schema=dfs.TpcHMulti> alter session set `planner.enable_hashjoin` = false;
      ----------------------+

      ok summary

      ----------------------+

      true planner.enable_hashjoin updated.

      ----------------------+
      1 row selected (0.024 seconds)
      0: jdbc:drill:schema=dfs.TpcHMulti> select o.O_TOTALPRICE, c.C_NAME
      . . . . . . . . . . . . . . . . . > from orders o, customer c
      . . . . . . . . . . . . . . . . . > where o.C_CUSTKEY = c.C_CUSTKEY and o.O_TOTALPRICE > 400000.00
      . . . . . . . . . . . . . . . . . > order by o.O_TOTALPRICE;
      Query failed: org.apache.drill.exec.rpc.RpcException: Remote failure while running query.[error_id: "3914508b-6c56-4598-a5aa-5d3f51885ded"
      endpoint {
      address: "perfnode104.perf.lab"
      user_port: 31010
      control_port: 31011
      data_port: 31012
      }
      error_type: 0
      message: "Failure while running fragment. < ArrayIndexOutOfBoundsException:[ 16666 ]"
      ]
      Error: exception while executing query (state=,code=0)

      Physical plan:

      0: jdbc:drill:schema=dfs.TpcHMulti> explain plan for select o.O_TOTALPRICE, c.C_NAME
      . . . . . . . . . . . . . . . . . > from orders o, customer c
      . . . . . . . . . . . . . . . . . > where o.C_CUSTKEY = c.C_CUSTKEY and o.O_TOTALPRICE > 400000.00
      . . . . . . . . . . . . . . . . . > order by o.O_TOTALPRICE ;
      ----------------------+

      text json

      ----------------------+

      ScreenPrel
      SingleMergeExchangePrel(sort0=[0 ASC])
      SelectionVectorRemoverPrel
      SortPrel(sort0=[$0], dir0=[ASC])
      HashToRandomExchangePrel(dist0=[[$0]])
      ProjectPrel(O_TOTALPRICE=[$2], C_NAME=[$5])
      MergeJoinPrel(condition=[=($1, $4)], joinType=[inner])
      SelectionVectorRemoverPrel
      SortPrel(sort0=[$1], dir0=[ASC])
      HashToRandomExchangePrel(dist0=[[$1]])
      FilterPrel(condition=[>($2, 400000.00)])
      ScanPrel(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=maprfs:/drill/testdata/tpch-multi/orders]], selectionRoot=/drill/testdata/tpch-multi/orders, columns=[SchemaPath [`C_CUSTKEY`], SchemaPath [`O_TOTALPRICE`]]]])
      SelectionVectorRemoverPrel
      SortPrel(sort0=[$1], dir0=[ASC])
      HashToRandomExchangePrel(dist0=[[$1]])
      ScanPrel(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=maprfs:/drill/testdata/tpch-multi/customer]], selectionRoot=/drill/testdata/tpch-multi/customer, columns=[SchemaPath [`C_CUSTKEY`], SchemaPath [`C_NAME`]]]])
      {
      "head" :
      Unknown macro: { "version" }

      ,
      "graph" : [ {
      "pop" : "parquet-scan",
      "@id" : 1,
      "entries" : [

      { "path" : "maprfs:/drill/testdata/tpch-multi/customer" }

      ],
      "storage" : {
      "type" : "file",
      "connection" : "maprfs:///",
      "workspaces" :

      Unknown macro: { "root" }

      ,
      "formats" :

      Unknown macro: { "psv" }


      },
      "format" :

      { "type" : "parquet" }

      ,
      "columns" : [ "`C_CUSTKEY`", "`C_NAME`" ],
      "selectionRoot" : "/drill/testdata/tpch-multi/customer"
      },

      { "pop" : "hash-to-random-exchange", "@id" : 2, "child" : 1, "expr" : "hash(`C_CUSTKEY`) ", "initialAllocation" : 1000000, "maxAllocation" : 10000000000 }

      ,

      Unknown macro: { "pop" }

      ,

      { "pop" : "selection-vector-remover", "@id" : 4, "child" : 3, "initialAllocation" : 1000000, "maxAllocation" : 10000000000 }

      ,

      Unknown macro: { "pop" }

      , {
      "pop" : "parquet-scan",
      "@id" : 6,
      "entries" : [

      { "path" : "maprfs:/drill/testdata/tpch-multi/orders" }

      ],
      "storage" : {
      "type" : "file",
      "connection" : "maprfs:///",
      "workspaces" :

      Unknown macro: { "root" }

      ,
      "formats" :

      Unknown macro: { "psv" }


      },
      "format" :

      { "type" : "parquet" }

      ,
      "columns" : [ "`C_CUSTKEY`", "`O_TOTALPRICE`" ],
      "selectionRoot" : "/drill/testdata/tpch-multi/orders"
      },

      { "pop" : "filter", "@id" : 7, "child" : 6, "expr" : "greater_than(`O_TOTALPRICE`, 400000.0) ", "initialAllocation" : 1000000, "maxAllocation" : 10000000000 }

      ,

      { "pop" : "hash-to-random-exchange", "@id" : 8, "child" : 7, "expr" : "hash(`C_CUSTKEY`) ", "initialAllocation" : 1000000, "maxAllocation" : 10000000000 }

      ,

      Unknown macro: { "pop" }

      ,

      { "pop" : "selection-vector-remover", "@id" : 10, "child" : 9, "initialAllocation" : 1000000, "maxAllocation" : 10000000000 }

      ,

      Unknown macro: { "pop" }

      ,

      Unknown macro: { "pop" }

      ,

      { "pop" : "hash-to-random-exchange", "@id" : 13, "child" : 12, "expr" : "hash(`O_TOTALPRICE`) ", "initialAllocation" : 1000000, "maxAllocation" : 10000000000 }

      ,

      Unknown macro: { "pop" }

      ,

      { "pop" : "selection-vector-remover", "@id" : 15, "child" : 14, "initialAllocation" : 1000000, "maxAllocation" : 10000000000 }

      ,

      Unknown macro: { "pop" }

      ,

      { "pop" : "screen", "@id" : 17, "child" : 16, "initialAllocation" : 1000000, "maxAllocation" : 10000000000 }

      ]
      }

      ----------------------+
      1 row selected (0.151 seconds)

      Attachments

        Activity

          People

            jnadeau Jacques Nadeau
            zhiyongliu Zhiyong Liu
            Votes:
            0 Vote for this issue
            Watchers:
            6 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: