Uploaded image for project: 'Kylin'
  1. Kylin
  2. KYLIN-4342 Build Global Dict by MR/Hive New Version
  3. KYLIN-4344

Build Global Dict by MR/Hive, Extract Fact Table Distinct Columns Step

    XMLWordPrintableJSON

Details

    • Sub-task
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • None
    • None
    • None
    • None
    • Sprint 50

    Description

      This step is the first step for Build Global Dict by MR/Hive, mainly realizes the following functions by HQL:

      • Create global dict hive table of the cube if not exists, default globalDictDatabase.cubeName_global_dict;
      • Create extract distinct value intermediate hive table, default flatTableName__group_by;
      • Create global dict hive intermediate table for this segment , defalut flatTableName_global_dict;
      • Insert data into flatTableName__group_by hive table.

      SQL Example

       

      //One cube only have only one global_dict table
      CREATE TABLE IF NOT EXISTS db.kylin_sales_cube_mr_notopn_global_dict(  dict_key STRING COMMENT '',  dict_val INT COMMENT '' ) COMMENT '' PARTITIONED BY (dict_column string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE; 
      
      //Each segment build job has a flatTableName__group_by table
      DROP TABLE IF EXISTS
      kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by; CREATE TABLE IF NOT EXISTS kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by (  dict_key STRING COMMENT '' ) COMMENT '' PARTITIONED BY (dict_column string) STORED AS TEXTFILE ;
      
      //Each segment build job has a flatTableName_global_dict table
      DROP TABLE IF EXISTS kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115_global_dict; CREATE TABLE IF NOT EXISTS kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115_global_dict (  dict_key STRING COMMENT '' ,  dict_val STRING COMMENT '' ) COMMENT '' PARTITIONED BY (dict_column string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE ;
      
      //Insert data into flatTableName__group_by table
      //global dict columns partitons
      INSERT OVERWRITE TABLE kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by PARTITION (dict_column = 'KYLIN_SALES_SELLER_ID') SELECT a.DICT_KEY FROM  (SELECT KYLIN_SALES_SELLER_ID  as DICT_KEY FROM kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115 GROUP BY KYLIN_SALES_SELLER_ID) a   LEFT JOIN  (SELECT  DICT_KEY FROM db.kylin_sales_cube_mr_notopn_global_dict WHERE DICT_COLUMN = 'KYLIN_SALES_SELLER_ID') b  ON a.DICT_KEY = b.DICT_KEY WHERE   b.DICT_KEY IS NULL ;
      
      INSERT OVERWRITE TABLE kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by PARTITION (dict_column = 'KYLIN_SALES_BUYER_ID') SELECT a.DICT_KEY FROM  (SELECT KYLIN_SALES_BUYER_ID  as DICT_KEY FROM kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115 GROUP BY KYLIN_SALES_BUYER_ID) a   LEFT JOIN  (SELECT  DICT_KEY FROM db.kylin_sales_cube_mr_notopn_global_dict WHERE DICT_COLUMN = 'KYLIN_SALES_BUYER_ID') b     ON a.DICT_KEY = b.DICT_KEY WHERE b.DICT_KEY IS NULL;
      // KYLIN_MAX_DISTINCT_COUNT special partition
      INSERT OVERWRITE TABLE kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by PARTITION (DICT_COLUMN = 'KYLIN_MAX_DISTINCT_COUNT') SELECT  CONCAT_WS(',', tc.dict_column, cast(tc.total_distinct_val AS String), if(tm.max_dict_val is null, '0', cast(max_dict_val as string))) FROM  (SELECT  dict_column,count(1) total_distinct_val FROM kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by where DICT_COLUMN != 'KYLIN_MAX_DISTINCT_COUNT' group by dict_column) tc  LEFT JOIN  (SELECT  dict_column,if(max(dict_val) is null, 0, max(dict_val)) as max_dict_val FROM db.kylin_sales_cube_mr_notopn_global_dict group by dict_column) tm  ON  tc.dict_column = tm.dict_column;
      

      Attachments

        Issue Links

          Activity

            People

              wangxiaojing wangxiaojing
              wangxiaojing wangxiaojing
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: