From 91882b8dcefcb3c8339aa3971ce12467c54a9595 Mon Sep 17 00:00:00 2001 From: shaofengshi Date: Fri, 5 Dec 2014 10:04:10 +0800 Subject: [PATCH 1/2] =?UTF-8?q?Extract=20=E2=80=9Ctable=E2=80=9D=20from=20?= =?UTF-8?q?=E2=80=9Ccolumn=E2=80=9D=20in=20Dimension=20description.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kylinolap/cube/dataGen/FactTableGenerator.java | 5 --- .../com/kylinolap/cube/model/DimensionDesc.java | 38 ++++------------------ .../cube_desc/test_kylin_cube_with_slr_desc.json | 18 ++++++---- .../test_kylin_cube_with_slr_left_join_desc.json | 21 ++++++++---- .../test_kylin_cube_without_slr_desc.json | 15 ++++++--- ...test_kylin_cube_without_slr_left_join_desc.json | 15 ++++++--- .../com/kylinolap/metadata/model/TableDesc.java | 2 +- 7 files changed, 53 insertions(+), 61 deletions(-) diff --git a/cube/src/main/java/com/kylinolap/cube/dataGen/FactTableGenerator.java b/cube/src/main/java/com/kylinolap/cube/dataGen/FactTableGenerator.java index 15d435d..7781e61 100644 --- a/cube/src/main/java/com/kylinolap/cube/dataGen/FactTableGenerator.java +++ b/cube/src/main/java/com/kylinolap/cube/dataGen/FactTableGenerator.java @@ -305,11 +305,6 @@ private String generate() throws Exception { if (jDesc == null) { // column on fact table used directly as a dimension for (String aColumn : dim.getColumn()) { - int lastIndexOfDot = aColumn.lastIndexOf("."); - if (lastIndexOfDot >= 0) { - aColumn = aColumn.substring(lastIndexOfDot + 1); - } - if (!factTableCol2LookupCol.containsKey(aColumn)) usedCols.add(aColumn); } diff --git a/cube/src/main/java/com/kylinolap/cube/model/DimensionDesc.java b/cube/src/main/java/com/kylinolap/cube/model/DimensionDesc.java index 39e8fb1..348b24a 100644 --- a/cube/src/main/java/com/kylinolap/cube/model/DimensionDesc.java +++ b/cube/src/main/java/com/kylinolap/cube/model/DimensionDesc.java @@ -47,7 +47,6 @@ private boolean isHierarchy; @JsonProperty("table") private String table; - private String database; @JsonProperty("column") private String[] column; @JsonProperty("derived") @@ -78,11 +77,7 @@ public boolean isHierarchy() { * @return */ public String getTable() { - if (database == null) { - return ("DEFAULT." + table).toUpperCase(); - } else { - return (database + "." + table).toUpperCase(); - } + return table; } public int getId() { @@ -182,6 +177,7 @@ public String toString() { * parse column to get db name and table name * @return an array carries db name + table name * @throws IllegalStateException if the column name or name is incorrect or inaccurate + * @deprecated */ private String[] parseTableDBName(String thisColumn, Map> columnTableMap, Map> tableDatabaseMap) { String tableName = null, dbName = null; @@ -228,35 +224,13 @@ public String toString() { public void init(CubeDesc cubeDesc, Map tables, Map> columnTableMap, Map> tableDatabaseMap) { if (name != null) name = name.toUpperCase(); - - this.table = null; - this.database = null; - this.join = null; - - for (int i = 0, n = this.column.length; i < n; i++) { - String thisColumn = this.column[i].toUpperCase(); - - if (this.table == null || this.database == null) { - String[] dbTableNames = parseTableDBName(thisColumn, columnTableMap, tableDatabaseMap); - - if (database == null) { - database = dbTableNames[0]; - } else if (!database.equals(dbTableNames[0])) { - throw new IllegalStateException("One dimension can only refer to the tables in the same db: '" + database + "' and '" + dbTableNames[0] + "'."); - } - - if (table == null) { - table = dbTableNames[1]; - } else if (!table.equalsIgnoreCase(dbTableNames[1])) { - throw new IllegalStateException("One dimension can only refer to the columns on the same table: '" + table + "' and '" + dbTableNames[1] + "'."); - } - } - - } + + if (table != null) + table = table.toUpperCase(); tableDesc = tables.get(this.getTable()); if (tableDesc == null) - throw new IllegalStateException("Can't find table " + table + " on dimension " + name); + throw new IllegalStateException("Can't find table " + table + " for dimension " + name); for (LookupDesc lookup : cubeDesc.getModel().getLookups()) { if (lookup.getTable().equalsIgnoreCase(this.getTable())) { diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json index 4189067..d152df3 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_desc.json @@ -11,32 +11,38 @@ "dimensions": [ { "name": "cal_dt", - "column": ["edw.test_cal_dt.cal_dt"], + "table": "edw.test_cal_dt", + "column": ["cal_dt"], "derived": ["week_beg_dt"] }, { "name": "category", - "column": ["default.test_category_groupings.meta_categ_name", "default.test_category_groupings.categ_lvl2_name", "default.test_category_groupings.categ_lvl3_name"], + "table": "default.test_category_groupings", + "column": ["meta_categ_name", "categ_lvl2_name", "categ_lvl3_name"], "derived": ["USER_DEFINED_FIELD1", "USER_DEFINED_FIELD3", "UPD_DATE", "UPD_USER"], "hierarchy": true }, { "name": "lstg_format_name", - "column": ["default.test_kylin_fact.lstg_format_name"] + "table": "default.test_kylin_fact", + "column": ["lstg_format_name"] }, { "name": "site_id", - "column": ["edw.test_sites.site_id"], + "table": "edw.test_sites", + "column": ["site_id"], "derived": ["site_name", "cre_user"] }, { "name": "seller_type_cd", - "column": ["edw.test_seller_type_dim.seller_type_cd"], + "table": "edw.test_seller_type_dim", + "column": ["seller_type_cd"], "derived": ["seller_type_desc"] }, { "name": "seller_id", - "column": ["default.test_kylin_fact.seller_id"] + "table": "default.test_kylin_fact", + "column": ["seller_id"] } ], "measures": [ diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_left_join_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_left_join_desc.json index 59a0f3e..b67ab38 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_left_join_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_with_slr_left_join_desc.json @@ -11,36 +11,43 @@ "dimensions": [ { "name": "cal_dt", - "column": ["edw.test_cal_dt.cal_dt"], + "table": "edw.test_cal_dt", + "column": ["cal_dt"], "derived": ["week_beg_dt"] }, { "name": "category", - "column": ["default.test_category_groupings.meta_categ_name", "default.test_category_groupings.categ_lvl2_name", "default.test_category_groupings.categ_lvl3_name"], + "table": "default.test_category_groupings", + "column": ["meta_categ_name", "categ_lvl2_name", "categ_lvl3_name"], "hierarchy": true }, { "name": "category_derived", - "column": ["default.test_category_groupings.leaf_categ_id", "default.test_category_groupings.site_id"], + "table": "default.test_category_groupings", + "column": ["leaf_categ_id", "site_id"], "derived": ["USER_DEFINED_FIELD1", "USER_DEFINED_FIELD3", "UPD_DATE", "UPD_USER"] }, { "name": "lstg_format_name", - "column": ["default.test_kylin_fact.lstg_format_name"] + "table": "default.test_kylin_fact", + "column": ["lstg_format_name"] }, { "name": "site_id", - "column": ["edw.test_sites.site_id"], + "table": "edw.test_sites", + "column": ["site_id"], "derived": ["site_name", "cre_user"] }, { "name": "seller_type_cd", - "column": ["edw.test_seller_type_dim.seller_type_cd"], + "table": "edw.test_seller_type_dim", + "column": ["seller_type_cd"], "derived": ["seller_type_desc"] }, { "name": "seller_id", - "column": ["default.test_kylin_fact.seller_id"] + "table": "default.test_kylin_fact", + "column": ["seller_id"] } ], "measures": [ diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json index 362127d..e109fbe 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json @@ -12,14 +12,16 @@ "dimensions": [ { "name": "cal_dt", - "column": ["edw.test_cal_dt.cal_dt"], + "table": "edw.test_cal_dt", + "column": ["cal_dt"], "derived": [ "week_beg_dt" ] }, { "name": "category", - "column": ["default.test_category_groupings.meta_categ_name", "default.test_category_groupings.categ_lvl2_name", "default.test_category_groupings.categ_lvl3_name"], + "table": "default.test_category_groupings", + "column": ["meta_categ_name", "categ_lvl2_name", "categ_lvl3_name"], "derived": [ "USER_DEFINED_FIELD1", "USER_DEFINED_FIELD3", @@ -30,11 +32,13 @@ }, { "name": "lstg_format_name", - "column": ["default.test_kylin_fact.lstg_format_name"] + "table": "default.test_kylin_fact", + "column": ["lstg_format_name"] }, { "name": "site_id", - "column": ["edw.test_sites.site_id"], + "table": "edw.test_sites", + "column": ["site_id"], "derived": [ "site_name", "cre_user" @@ -42,7 +46,8 @@ }, { "name": "seller_type_cd", - "column": ["edw.test_seller_type_dim.seller_type_cd"], + "table": "edw.test_seller_type_dim", + "column": ["seller_type_cd"], "derived": [ "seller_type_desc" ] diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json index d9e832e..9ab5eed 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json @@ -10,14 +10,16 @@ "dimensions": [ { "name": "cal_dt", - "column": ["edw.test_cal_dt.cal_dt"], + "table": "edw.test_cal_dt", + "column": ["cal_dt"], "derived": [ "week_beg_dt" ] }, { "name": "category", - "column": ["default.test_category_groupings.meta_categ_name", "default.test_category_groupings.categ_lvl2_name", "default.test_category_groupings.categ_lvl3_name"], + "table": "default.test_category_groupings", + "column": ["meta_categ_name", "categ_lvl2_name", "categ_lvl3_name"], "derived": [ "USER_DEFINED_FIELD1", "USER_DEFINED_FIELD3", @@ -28,11 +30,13 @@ }, { "name": "lstg_format_name", - "column": ["default.test_kylin_fact.lstg_format_name"] + "table": "default.test_kylin_fact", + "column": ["lstg_format_name"] }, { "name": "site_id", - "column": ["edw.test_sites.site_id"], + "table": "edw.test_sites", + "column": ["site_id"], "derived": [ "site_name", "cre_user" @@ -40,7 +44,8 @@ }, { "name": "seller_type_cd", - "column": ["edw.test_seller_type_dim.seller_type_cd"], + "table": "edw.test_seller_type_dim", + "column": ["seller_type_cd"], "derived": [ "seller_type_desc" ] diff --git a/metadata/src/main/java/com/kylinolap/metadata/model/TableDesc.java b/metadata/src/main/java/com/kylinolap/metadata/model/TableDesc.java index 45bb2b2..1b15337 100644 --- a/metadata/src/main/java/com/kylinolap/metadata/model/TableDesc.java +++ b/metadata/src/main/java/com/kylinolap/metadata/model/TableDesc.java @@ -41,7 +41,7 @@ private DatabaseDesc database; public ColumnDesc findColumnByName(String name) { - //ignore the db name and table name + //ignore the db name and table name if exists int lastIndexOfDot = name.lastIndexOf("."); if (lastIndexOfDot >= 0) { name = name.substring(lastIndexOfDot + 1); From deb484eaff5e6dba16ed8741e3cf98eceada2a6c Mon Sep 17 00:00:00 2001 From: shaofengshi Date: Fri, 5 Dec 2014 13:03:09 +0800 Subject: [PATCH 2/2] =?UTF-8?q?Add=20=E2=80=9Ctable=E2=80=9D=20attribute?= =?UTF-8?q?=20on=20=E2=80=9Cdimensions=E2=80=9D=20description,=20and=20rem?= =?UTF-8?q?ove=20the=20table=20from=20=E2=80=9Ccolumn=E2=80=9D.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/kylinolap/cube/model/CubePartitionDesc.java | 13 +++++++++---- job/src/main/java/com/kylinolap/job/JoinedFlatTable.java | 3 ++- .../com/kylinolap/job/hadoop/hive/JoinedFlatTableDesc.java | 7 +++++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/cube/src/main/java/com/kylinolap/cube/model/CubePartitionDesc.java b/cube/src/main/java/com/kylinolap/cube/model/CubePartitionDesc.java index bc3c680..4444807 100644 --- a/cube/src/main/java/com/kylinolap/cube/model/CubePartitionDesc.java +++ b/cube/src/main/java/com/kylinolap/cube/model/CubePartitionDesc.java @@ -47,14 +47,19 @@ public void init(Map> columnMap) { if (null != partitionDateColumn) { partitionDateColumn = partitionDateColumn.toUpperCase(); - + String[] columns = StringSplitter.split(partitionDateColumn, "."); if (null != columns && columns.length == 3) { - Map cols = columnMap.get(columns[0].toUpperCase() + "." + columns[1].toUpperCase()); - if (cols != null) + String tableName = columns[0].toUpperCase() + "." + columns[1].toUpperCase(); + Map cols = columnMap.get(tableName); + if (cols != null) { partitionDateColumnRef = cols.get(columns[2].toUpperCase()); - + } else { + throw new IllegalStateException("The table '" + tableName + "' provided in 'partition_date_column' doesn't exist."); + } + } else { + throw new IllegalStateException("The 'partition_date_column' format is invalid: " + partitionDateColumn + ", it should be {db}.{table}.{column}."); } } } diff --git a/job/src/main/java/com/kylinolap/job/JoinedFlatTable.java b/job/src/main/java/com/kylinolap/job/JoinedFlatTable.java index f13e6d5..ccb218a 100644 --- a/job/src/main/java/com/kylinolap/job/JoinedFlatTable.java +++ b/job/src/main/java/com/kylinolap/job/JoinedFlatTable.java @@ -123,12 +123,13 @@ public static String generateInsertDataStatement(JoinedFlatTableDesc intermediat public static String generateSelectDataStatement(JoinedFlatTableDesc intermediateTableDesc) { StringBuilder sql = new StringBuilder(); sql.append("SELECT" + "\n"); + String tableAlias; for (int i = 0; i < intermediateTableDesc.getColumnList().size(); i++) { IntermediateColumnDesc col = intermediateTableDesc.getColumnList().get(i); if (i > 0) { sql.append(","); } - String tableAlias = intermediateTableDesc.getTableAlias(col.getTableName()); + tableAlias = intermediateTableDesc.getTableAlias(col.getTableName()); sql.append(tableAlias + "." + col.getColumnName() + "\n"); } appendJoinStatement(intermediateTableDesc, sql); diff --git a/job/src/main/java/com/kylinolap/job/hadoop/hive/JoinedFlatTableDesc.java b/job/src/main/java/com/kylinolap/job/hadoop/hive/JoinedFlatTableDesc.java index 56770a7..92cf2a0 100644 --- a/job/src/main/java/com/kylinolap/job/hadoop/hive/JoinedFlatTableDesc.java +++ b/job/src/main/java/com/kylinolap/job/hadoop/hive/JoinedFlatTableDesc.java @@ -44,6 +44,9 @@ // Map for table alais; key: table name; value: alias; private Map tableAliasMap; + + public static final String FACT_TABLE_ALIAS = "FACT_TABLE"; + public static final String LOOKUP_TABLE_ALAIS_PREFIX = "LOOKUP_"; public JoinedFlatTableDesc(CubeDesc cubeDesc, CubeSegment cubeSegment) { this.cubeDesc = cubeDesc; @@ -124,13 +127,13 @@ private void parseCubeDesc() { private void buileTableAliasMap() { tableAliasMap = new HashMap(); - tableAliasMap.put(cubeDesc.getFactTable(), "FACT_TABLE"); + tableAliasMap.put(cubeDesc.getFactTable(), FACT_TABLE_ALIAS); int i=1; for (DimensionDesc dim : cubeDesc.getDimensions()) { JoinDesc join = dim.getJoin(); if(join != null) { - tableAliasMap.put(dim.getTable(), "LOOKUP_" + i); + tableAliasMap.put(dim.getTable(), LOOKUP_TABLE_ALAIS_PREFIX + i); i++; }