Uploaded image for project: 'Hive'
  1. Hive
  2. HIVE-21054

union all and join result wrong

    XMLWordPrintableJSON

    Details

    • Type: Bug
    • Status: Open
    • Priority: Major
    • Resolution: Unresolved
    • Affects Version/s: 3.1.1
    • Fix Version/s: None
    • Component/s: Query Planning
    • Labels:
      None

      Description

       i make 3 tables 

       

      // 코드 자리 표시자
      CREATE TABLE `testdb`.`tab` ( 
      stn1 STRING COMMENT '' ,
      int2 BIGINT COMMENT '' ,
      float3 FLOAT COMMENT '' ,
      long4 STRING COMMENT '' ,
      double5 DOUBLE COMMENT '' ,
      boolean6 BOOLEAN COMMENT '' ,
      timestamp7 TIMESTAMP COMMENT '' ,
      words8 STRING COMMENT '' ,
      email9 STRING COMMENT '' ,
      time10 STRING COMMENT '' ,
      int11 BIGINT COMMENT '' ,
      float12 FLOAT COMMENT '' ,
      long13 STRING COMMENT '' ,
      double14 DOUBLE COMMENT '' ,
      boolean15 BOOLEAN COMMENT '' ,
      timestamp16 TIMESTAMP COMMENT '' ,
      email17 STRING COMMENT '' ,
      time18 STRING COMMENT '' ,
      list19 STRING COMMENT '' ,
      id20 STRING COMMENT '' ,
      sqltime21 TIMESTAMP COMMENT '' ,
      ctype22 INT COMMENT '' ,
      reg23 STRING COMMENT '' 
      )
      
      CREATE TABLE `testdb`.`space` ( 
      stn1 STRING COMMENT '' ,
      int2 BIGINT COMMENT '' ,
      float3 FLOAT COMMENT '' ,
      long4 STRING COMMENT '' ,
      double5 DOUBLE COMMENT '' ,
      boolean6 BOOLEAN COMMENT '' ,
      timestamp7 TIMESTAMP COMMENT '' ,
      words8 STRING COMMENT '' ,
      email9 STRING COMMENT '' ,
      time10 STRING COMMENT '' ,
      int11 BIGINT COMMENT '' ,
      float12 FLOAT COMMENT '' ,
      long13 STRING COMMENT '' ,
      double14 DOUBLE COMMENT '' ,
      boolean15 BOOLEAN COMMENT '' ,
      timestamp16 TIMESTAMP COMMENT '' ,
      email17 STRING COMMENT '' ,
      time18 STRING COMMENT '' ,
      list19 STRING COMMENT '' ,
      id20 STRING COMMENT '' ,
      sqltime21 TIMESTAMP COMMENT '' ,
      ctype22 INT COMMENT '' ,
      reg23 STRING COMMENT '' 
      )
      
      CREATE TABLE `testdb`.`colon` ( 
      stn1 STRING COMMENT '' ,
      int2 BIGINT COMMENT '' ,
      float3 FLOAT COMMENT '' ,
      long4 STRING COMMENT '' ,
      double5 DOUBLE COMMENT '' ,
      boolean6 BOOLEAN COMMENT '' ,
      timestamp7 TIMESTAMP COMMENT '' ,
      words8 STRING COMMENT '' ,
      email9 STRING COMMENT '' ,
      time10 STRING COMMENT '' ,
      int11 BIGINT COMMENT '' ,
      float12 FLOAT COMMENT '' ,
      long13 STRING COMMENT '' ,
      double14 DOUBLE COMMENT '' ,
      boolean15 BOOLEAN COMMENT '' ,
      timestamp16 TIMESTAMP COMMENT '' ,
      email17 STRING COMMENT '' ,
      time18 STRING COMMENT '' ,
      list19 STRING COMMENT '' ,
      id20 STRING COMMENT '' ,
      sqltime21 TIMESTAMP COMMENT '' ,
      ctype22 INT COMMENT '' ,
      reg23 STRING COMMENT '' 
      )
      

       

       

      and execute query at below : 

      // 
      set hive.vectorized.execution.enabled=FALSE;
      
      SELECT distinct t1.ctype, t1.id_all , t2.list19
      FROM (
      SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
      FROM tab WHERE stn1='20130101010100'
      AND ctype22 BETWEEN 2 AND 5   --result 45 row
      
      UNION ALL 
      
      SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
      FROM space 
      WHERE stn1='20130102010100'
      AND ctype22 BETWEEN 2 AND 5   --result empty
      
      ) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows
      

      expected result 

      ctype id_all list19
      2 104 ITEM30
      2 683 ITEM30
      1 970 ITEM30

      but, actual result empty..

       

       

      instead make table "union all" query result

      // 코드 자리 표시자
      set hive.vectorized.execution.enabled=FALSE;
      
      create table unionalltbl as
      SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list 
      FROM tab WHERE stn1='20130101010100'
      AND ctype22 BETWEEN 2 AND 5 
      
      UNION ALL 
      
      SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list 
      FROM space 
      WHERE stn1='20130102010100'
      AND ctype22 BETWEEN 2 AND 5
      

       

      and retry query 

      // 코드 자리 표시자
      set hive.vectorized.execution.enabled=FALSE;
      
      select DISTINCT t1.close_type, t1.id_all, t2.list19
      from unionalltbl t1 JOIN colon t2 ON t1.id_all=t2.id20 and t2.list19='ITEM30'
      

      i got expected result 

       

      and i compare two query explain
      first query

      // 코드 자리 표시자
      rel#18659:HiveProject.HIVE.[](input=rel#18657:HiveAggregate.HIVE.[]
      (
      input=rel#18655:HiveProject.HIVE.[]
      (
      input=rel#18653:HiveJoin.HIVE.[]
      (
      left=rel#18650:HiveProject.HIVE.[]
      (
      input=rel#18648:HiveUnion.HIVE.[]
      (
      input#0=rel#18641:HiveProject.HIVE.[]
      (
      input=rel#18639:HiveFilter.HIVE.[]
      (
      input=rel#18611:HiveTableScan.HIVE.[](table=[testdb.tab],table:alias=tab)[false]
      ,condition=AND(=($0, _UTF-16LE'20130101010100'), BETWEEN(false, $21, 2, 5))
      )
      ,close_type=$21,id_all=$19,item_list=$18
      )
      ,input#1=rel#18646:HiveProject.HIVE.[]
      ( 
      input=rel#18644:HiveFilter.HIVE.[]
      (
      input=rel#18614:HiveTableScan.HIVE.[](table=[testdb.space],table:alias=space)[false]
      ,condition=AND(=($0, _UTF-16LE'20130102010100'), BETWEEN(false, $21, 2, 5))
      ),close_type=$21,id_all=$19,item_list=$18
      )
      ,all=true
      )
      ,close_type=$0,id_all=$1,item_list=$2
      ),
      right=rel#18619:HiveTableScan.HIVE.[]
      (
      table=[testdb.colon],table:alias=t2
      )
      [false],condition=AND
      (
      =($1, $22), =($21, _UTF-16LE'ITEM30')
      )
      ,joinType=inner,algorithm=none,cost=not available
      )
      ,$f0=$0,$f1=$1,$f2=$21
      )
      ,group={0, 1, 2}
      )
      ,close_type=$0,id_all=$1,list19=$2)
      

      second query

      // 코드 자리 표시자
      rel#17893:HiveProject.HIVE.[]
      (
      input=rel#17891:HiveAggregate.HIVE.[]
      (
      input=rel#17889:HiveProject.HIVE.[]
      (
      input=rel#17887:HiveJoin.HIVE.[]
      (
      left=rel#17872:HiveTableScan.HIVE.[]
      (able=[testdb.unionalltbl],table:alias=t1)[false],
      
      right=rel#17873:HiveTableScan.HIVE.[](table=[testdb.colon],table:alias=t2)[false],
      condition=AND(=($1, $25), =($24, _UTF-16LE'ITEM30')),
      joinType=inner,algorithm=none,cost=not available
      ),
      $f0=$0,$f1=$1,$f2=$24
      ),
      group={0, 1, 2}
      )
      ,close_type=$0,id_all=$1,list19=$2
      )
      

      why right table 'colon' condition column number different? 

       

        Attachments

          Activity

            People

            • Assignee:
              Unassigned
              Reporter:
              dave.kim daveKim
            • Votes:
              0 Vote for this issue
              Watchers:
              1 Start watching this issue

              Dates

              • Created:
                Updated: