Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-4385

Left join doesn't work properly for array

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Closed
    • Major
    • Resolution: Duplicate
    • Impala 2.6.0
    • None
    • Frontend
    • None

    Description

      There is a bug with left join for array. Problem is that left join doesn't distinguish 'where' and 'on' statement.

      Let's consider equivalent information which is saved first in common SQL structure, second in array. Left join should return same result, but it doesn't.

      Please let me know if anything is not clear

      Compare results

      -- checking left join result without condition in where part - result is the same as expected
      select a.x from example_2 a
      left join example_2_details b on a.x=b.x and b.y='noon'
      order by 1;
      
      select a.x from example a
      left join a.y b on b.item='noon'
      order by 1;
      

      RESULTS: 1,2,3,4,5,6,7 vs. 1,2,3,4,5,6,7

      -- checking left anti join result - result is the same as expected
      select a.x from example_2 a
      left anti join example_2_details b on a.x=b.x and b.y='noon';
      
      select a.x from example a
      left anti join a.y b on b.item='noon'
      order by 1;
      

      RESULTS: 2,3,7 vs. 2,3,7

      -- checking left join result with condition in where- result is NOT the same, bug
      select a.x from example_2 a
      left join example_2_details b on a.x=b.x and b.y='noon'
      where b.y is null
      order by 1;
      
      select a.x from example a
      left join a.y b on b.item='noon'
      where b.item is null
      order by 1;
      

      RESULTS: 2,3,7 vs. 1,2,3,4,5,6,7

      Input data and tables

      /*common SQL input data */
      create table example_2 (number string, x string);
      insert into table example_2 values ('one','1'),('two','2'),('three','3'),('four','4'),('five','5'),('six','6'),('seven','7');
      
      
      create table example_2_details (x string, y string);
      
      insert into table example_2_details values ('1','morning');
      insert into table example_2_details values ('1','noon');
      insert into table example_2_details values ('1','evening');
      insert into table example_2_details values ('2','morning');
      insert into table example_2_details values ('2','evening');
      insert into table example_2_details values ('3','morning');
      insert into table example_2_details values ('4','morning');
      insert into table example_2_details values ('4','noon');
      insert into table example_2_details values ('5','evening');
      insert into table example_2_details values ('5','noon');
      insert into table example_2_details values ('6','noon');
      insert into table example_2_details values ('7','evening');
      
      /*data stored in array, code is from hive*/
      
      create table example (x string, y array<string>) stored as parquet ;
      
      INSERT INTO table example  SELECT '1', array('morning','noon','evening') from dum limit 1;
      INSERT INTO table example  SELECT '2', array('morning','evening') from dum limit 1;
      INSERT INTO table example  SELECT '3', array('morning') from dum limit 1;
      INSERT INTO table example  SELECT '4', array('morning','noon') from dum limit 1;
      INSERT INTO table example  SELECT '5', array('noon','evening') from dum limit 1;
      INSERT INTO table example  SELECT '6', array('noon') from dum limit 1;
      INSERT INTO table example  SELECT '7', array('evening') from dum limit 1;
      
      /*we can check that informations in both structures are equvalent */
      select count(1), min(card), max(card) from
      (
      select a.x, b.y, count(1) over () card from example_2 a
      join example_2_details b on a.x=b.x
      union
      select a.x, b.item, count(1) over () card from example a
      join a.y b) t
      

      Attachments

        Issue Links

          Activity

            People

              Unassigned Unassigned
              brejcak@centrum.cz Peter Brejcak
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: