Details
-
Sub-task
-
Status: Resolved
-
Major
-
Resolution: Fixed
-
None
-
None
-
None
-
None
-
Sprint 50
Description
This step is the first step for Build Global Dict by MR/Hive, mainly realizes the following functions by HQL:
- Create global dict hive table of the cube if not exists, default globalDictDatabase.cubeName_global_dict;
- Create extract distinct value intermediate hive table, default flatTableName__group_by;
- Create global dict hive intermediate table for this segment , defalut flatTableName_global_dict;
- Insert data into flatTableName__group_by hive table.
SQL Example
//One cube only have only one global_dict table CREATE TABLE IF NOT EXISTS db.kylin_sales_cube_mr_notopn_global_dict( dict_key STRING COMMENT '', dict_val INT COMMENT '' ) COMMENT '' PARTITIONED BY (dict_column string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE; //Each segment build job has a flatTableName__group_by table DROP TABLE IF EXISTS kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by; CREATE TABLE IF NOT EXISTS kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by ( dict_key STRING COMMENT '' ) COMMENT '' PARTITIONED BY (dict_column string) STORED AS TEXTFILE ; //Each segment build job has a flatTableName_global_dict table DROP TABLE IF EXISTS kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115_global_dict; CREATE TABLE IF NOT EXISTS kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115_global_dict ( dict_key STRING COMMENT '' , dict_val STRING COMMENT '' ) COMMENT '' PARTITIONED BY (dict_column string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE ; //Insert data into flatTableName__group_by table //global dict columns partitons INSERT OVERWRITE TABLE kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by PARTITION (dict_column = 'KYLIN_SALES_SELLER_ID') SELECT a.DICT_KEY FROM (SELECT KYLIN_SALES_SELLER_ID as DICT_KEY FROM kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115 GROUP BY KYLIN_SALES_SELLER_ID) a LEFT JOIN (SELECT DICT_KEY FROM db.kylin_sales_cube_mr_notopn_global_dict WHERE DICT_COLUMN = 'KYLIN_SALES_SELLER_ID') b ON a.DICT_KEY = b.DICT_KEY WHERE b.DICT_KEY IS NULL ; INSERT OVERWRITE TABLE kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by PARTITION (dict_column = 'KYLIN_SALES_BUYER_ID') SELECT a.DICT_KEY FROM (SELECT KYLIN_SALES_BUYER_ID as DICT_KEY FROM kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115 GROUP BY KYLIN_SALES_BUYER_ID) a LEFT JOIN (SELECT DICT_KEY FROM db.kylin_sales_cube_mr_notopn_global_dict WHERE DICT_COLUMN = 'KYLIN_SALES_BUYER_ID') b ON a.DICT_KEY = b.DICT_KEY WHERE b.DICT_KEY IS NULL; // KYLIN_MAX_DISTINCT_COUNT special partition INSERT OVERWRITE TABLE kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by PARTITION (DICT_COLUMN = 'KYLIN_MAX_DISTINCT_COUNT') SELECT CONCAT_WS(',', tc.dict_column, cast(tc.total_distinct_val AS String), if(tm.max_dict_val is null, '0', cast(max_dict_val as string))) FROM (SELECT dict_column,count(1) total_distinct_val FROM kylin_intermediate_kylin_sales_cube_mr_6222c210_ce2d_e8ce_dd0f_f12c38fa9115__group_by where DICT_COLUMN != 'KYLIN_MAX_DISTINCT_COUNT' group by dict_column) tc LEFT JOIN (SELECT dict_column,if(max(dict_val) is null, 0, max(dict_val)) as max_dict_val FROM db.kylin_sales_cube_mr_notopn_global_dict group by dict_column) tm ON tc.dict_column = tm.dict_column;
Attachments
Issue Links
- links to