Uploaded image for project: 'Hive'
  1. Hive
  2. HIVE-17796

PTF in a view disables PPD

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Open
    • Major
    • Resolution: Unresolved
    • None
    • None
    • None
    • None

    Description

      I disabled constant propagation to make logging cleaner. It is the same if it is enabled. See truncated path to alias;
      Simple view with partition columns and filter outside of the view: PPD works.
      View with PTF and filter included in the view: PPD works.
      View with PTF and filter outside of the view: PPD breaks.

      I looked at the logs for some time, it looks like the predicate is already null in this case when passed to partition pruner; not sure why this is happening for now.
      View can also be partitioned.

      set hive.mapred.mode=nonstrict;
      set hive.explain.user=false;
      set hive.auto.convert.join=true;
      set hive.auto.convert.join.noconditionaltask=true;
      set hive.auto.convert.join.noconditionaltask.size=10000;
      set hive.metastore.aggregate.stats.cache.enabled=false;
      set hive.stats.fetch.column.stats=false;
      set hive.cbo.enable=false;
      
      
      create table dim (c2 string) partitioned by (pc1 string, pc2 string);
      create table fact (c1 string, c3 string) partitioned by (pc1 string, pc2 string);
      
      insert overwrite table dim partition (pc1='aaa', pc2='aaa') select key from src;
      insert overwrite table dim partition (pc1='ccc', pc2='ccc') select key from src;
      insert overwrite table dim partition (pc1='ddd', pc2='ddd') select key from src;
      insert overwrite table fact partition (pc1='aaa', pc2='aaa') select key, key from src;
      insert overwrite table fact partition (pc1='bbb', pc2='bbb') select key, key from src;
      insert overwrite table fact partition (pc1='ccc', pc2='ccc') select key, key from src;
      
      create view vw_ptf as select a1.*,
      (cast((row_number() over (partition by a1.pc1, a1.pc2)) as bigint) + b1.c2) as unique_key
      from fact a1 join dim b1 on a1.pc1 = b1.pc1 and a1.pc2 = b1.pc2;
      
      create view vw_simple as select a1.*, b1.c2
      from fact a1 join dim b1 on a1.pc1 = b1.pc1 and a1.pc2 = b1.pc2;
      
      create view vw_ppd as select a1.*,
      (cast((row_number() over (partition by a1.pc1, a1.pc2)) as bigint) + b1.c2) as Unique_Key
      from fact a1 join dim b1 on a1.pc1 = b1.pc1 and a1.pc2 = b1.pc2
      where a1.pc1 = 'ccc' and a1.pc2='ccc';
      
      set hive.optimize.constant.propagation=false;
      
      
      explain extended
      select a.* from vw_simple a WHERE 1 = 1 AND (a.pc1 = 'ccc' and a.pc2='ccc'); 
      explain extended
      select a.* from vw_ppd a WHERE 1 = 1 AND (a.pc1 = 'ccc' and a.pc2='ccc');
      explain extended
      select a.* from vw_ptf a WHERE 1 = 1 AND (a.pc1 = 'ccc' and a.pc2='ccc');
      

      Attachments

        Activity

          People

            Unassigned Unassigned
            sershe Sergey Shelukhin
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

              Created:
              Updated: