Uploaded image for project: 'Hive'
  1. Hive
  2. HIVE-24124

NPE occurs when bucket_version different bucket tables are joined

    XMLWordPrintableJSON

Details

    • Bug
    • Status: In Progress
    • Major
    • Resolution: Unresolved
    • None
    • None
    • Hive
    • None

    Description

      create table z_tab_1(
          task_id  string,    
          data_date  string,  
          accno  string,      
          curr_type  string,  
          ifrs9_pd12_value  double,
          ifrs9_ccf_value  double,
          ifrs9_lgd_value  double
      )partitioned by(pt_dt string)
      STORED AS ORCFILE
      TBLPROPERTIES ('bucketing_version'='1');
      
      alter table z_tab_1 add partition(pt_dt = '2020-7-31');
      
      insert into z_tab_1 partition(pt_dt = '2020-7-31') values
      ('123','2020-7-31','accno-xxxx','curr_type-xxxxx', 0.1, 0.2 ,0.3),
      ('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3),
      ('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3),
      ('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3),
      ('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3),
      ('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3),
      ('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3),
      ('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3),
      ('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3),
      ('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3);
      drop table if exists z_tab_2;
      CREATE TABLE z_tab_2(              
          task_id  string,                
          data_date  string,              
          accno  string,                  
          curr_type  string,              
          ifrs9_pd12_value  double,       
          ifrs9_ccf_value  double,        
          ifrs9_lgd_value  double
      ) 
      CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE)  SORTED by (TASK_ID, ACCNO, CURR_TYPE) INTO 2000 BUCKETS 
      ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
      STORED AS ORCFILE;
      
      set hive.enforce.bucketing=true;
      
      INSERT OVERWRITE TABLE z_tab_2
      SELECT  DCCR.TASK_ID
             ,DCCR.DATA_DATE
             ,DCCR.ACCNO
             ,DCCR.CURR_TYPE
             ,DCCR.IFRS9_PD12_VALUE
             ,DCCR.IFRS9_CCF_VALUE
             ,DCCR.IFRS9_LGD_VALUE 
      FROM z_tab_1 DCCR
      WHERE pt_dt = '2020-7-31';
      
      Caused by: java.lang.NullPointerException  
      at org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072)  
      at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988)  
      at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995)  
      at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941)  
      at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928)  
      at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)  
      at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237)  
      ... 7 more

      Attachments

        Activity

          People

            ashish-kumar-sharma Ashish Sharma
            luguangming GuangMing Lu
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated: