Uploaded image for project: 'Apache AsterixDB'
  1. Apache AsterixDB
  2. ASTERIXDB-1340

Index does not have a valid resource ID

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Critical
    • Resolution: Fixed
    • None
    • None
    • None

    Description

      I created a 3 NC cluster on a single machine, using the attached cluster configuration (local3.xml) and instance configuration (asterix-configuration.xml). The CSV files for the datasets are attached. Then I ran the following query.

      DDL:

      drop dataverse tpch if exists;
      create dataverse tpch;
      
      use dataverse tpch;
      
      create type LineItemType as closed {
        l_orderkey: int64,
        l_partkey: int64,
        l_suppkey: int64,
        l_linenumber: int64,
        l_quantity: int64,
        l_extendedprice: double,
        l_discount: double,
        l_tax: double,
        l_returnflag: string,
        l_linestatus: string,
        l_shipdate: string,
        l_commitdate: string,
        l_receiptdate: string,
        l_shipinstruct: string,
        l_shipmode: string,
        l_comment: string
      }
      
      create type OrderType as closed {
        o_orderkey: int64,
        o_custkey: int64,
        o_orderstatus: string,
        o_totalprice: double,
        o_orderdate: string,
        o_orderpriority: string,
        o_clerk: string,
        o_shippriority: int64,
        o_comment: string
      }
      
      create dataset LineItem(LineItemType)
        primary key l_orderkey, l_linenumber;
      create dataset Orders(OrderType)
        primary key o_orderkey;
      

      DML:

      use dataverse tpch;
      
      load dataset LineItem 
      using "org.apache.asterix.external.dataset.adapter.NCFileSystemAdapter"
      (("path"="asterix_nc1:///data/lineitem.tbl"),("format"="delimited-text"),("delimiter"="|"));
      
      load dataset Orders 
      using "org.apache.asterix.external.dataset.adapter.NCFileSystemAdapter"
      (("path"="asterix_nc1:///data/orders.tbl"),("format"="delimited-text"),("delimiter"="|"));
      

      Query:

      use dataverse tpch;
      
      declare function tmp()
      {
        for $l in dataset('LineItem')
        where $l.l_commitdate < $l.l_receiptdate
        distinct by $l.l_orderkey
        return { "o_orderkey": $l.l_orderkey }
      }
      
      for $o in dataset('Orders')
      for $t in tmp()
      where $o.o_orderkey = $t.o_orderkey and 
        $o.o_orderdate >= '1993-07-01' and $o.o_orderdate < '1993-10-01' 
      group by $o_orderpriority := $o.o_orderpriority with $o
      order by $o_orderpriority
      return {
        "order_priority": $o_orderpriority,
        "count": count($o)
      }
      

      The query fails with the following exception:

      org.apache.hyracks.api.exceptions.HyracksDataException: java.util.concurrent.ExecutionException: org.apache.hyracks.api.exceptions.HyracksDataException: Index does not have a valid resource ID. Has it been created yet?
              at org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:218)
              at org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.initialize(SuperActivityOperatorNodePushable.java:83)
              at org.apache.hyracks.control.nc.Task.run(Task.java:261)
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
              at java.lang.Thread.run(Thread.java:745)
      Caused by: java.util.concurrent.ExecutionException: org.apache.hyracks.api.exceptions.HyracksDataException: Index does not have a valid resource ID. Has it been created yet?
              at java.util.concurrent.FutureTask.report(FutureTask.java:122)
              at java.util.concurrent.FutureTask.get(FutureTask.java:192)
              at org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.runInParallel(SuperActivityOperatorNodePushable.java:212)
              ... 5 more
      Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: Index does not have a valid resource ID. Has it been created yet?
              at org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelper.open(IndexDataflowHelper.java:108)
              at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:111)
              at org.apache.hyracks.algebricks.runtime.operators.std.EmptyTupleSourceRuntimeFactory$1.open(EmptyTupleSourceRuntimeFactory.java:51)
              at org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor$1.initialize(AlgebricksMetaOperatorDescriptor.java:109)
              at org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable.lambda$initialize$0(SuperActivityOperatorNodePushable.java:83)
              at org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$$Lambda$4/1452854179.runAction(Unknown Source)
              at org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:205)
              at org.apache.hyracks.api.rewriter.runtime.SuperActivityOperatorNodePushable$1.call(SuperActivityOperatorNodePushable.java:202)
              at java.util.concurrent.FutureTask.run(FutureTask.java:266)
              ... 3 more
      

      It seems the issue is related to the "distinct by". I have tried the following query also and it works:

      use dataverse tpch;
      
      declare function tmp()
      {
        for $l in dataset('LineItem')
        where $l.l_commitdate < $l.l_receiptdate
        group by $l_orderkey := $l.l_orderkey with $l
        return { "o_orderkey": $l_orderkey }
      }
      
      for $o in dataset('Orders')
      for $t in tmp()
      where $o.o_orderkey = $t.o_orderkey and 
        $o.o_orderdate >= '1993-07-01' and $o.o_orderdate < '1993-10-01' 
      group by $o_orderpriority := $o.o_orderpriority with $o
      order by $o_orderpriority
      return {
        "order_priority": $o_orderpriority,
        "count": count($o)
      }
      

      But I have no clue why "distinct by" is related to the resource ID.
      Also, the original query works when I only have two NCs.

      Attachments

        1. asterix-configuration.xml
          7 kB
          Yingyi Bu
        2. lineitem.tbl
          691 kB
          Yingyi Bu
        3. local3.xml
          2 kB
          Yingyi Bu
        4. orders.tbl
          159 kB
          Yingyi Bu

        Activity

          People

            mhubail Murtadha Makki Al Hubail
            buyingyi Yingyi Bu
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: