From ed08fdc6320d3748f8e3c196bc78f4a0f0eaece3 Mon Sep 17 00:00:00 2001 From: gwang3 Date: Tue, 19 Dec 2017 19:19:27 +0800 Subject: [PATCH] KYLIN-2903:support cardinality calculation for Hive view --- .../apache/kylin/rest/service/TableService.java | 54 ++++++++++++++++++++-- .../kylin/rest/service/TableServiceTest.java | 47 +++++++++++++++++++ 2 files changed, 96 insertions(+), 5 deletions(-) create mode 100644 server-base/src/test/java/org/apache/kylin/rest/service/TableServiceTest.java diff --git a/server-base/src/main/java/org/apache/kylin/rest/service/TableService.java b/server-base/src/main/java/org/apache/kylin/rest/service/TableService.java index 901ac46e0..d8dbc191a 100644 --- a/server-base/src/main/java/org/apache/kylin/rest/service/TableService.java +++ b/server-base/src/main/java/org/apache/kylin/rest/service/TableService.java @@ -31,10 +31,12 @@ import java.util.UUID; import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.util.HadoopUtil; +import org.apache.kylin.common.util.HiveCmdBuilder; import org.apache.kylin.common.util.Pair; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.engine.mr.common.HadoopShellExecutable; import org.apache.kylin.engine.mr.common.MapReduceExecutable; +import org.apache.kylin.job.common.ShellExecutable; import org.apache.kylin.job.execution.DefaultChainedExecutable; import org.apache.kylin.job.execution.ExecutableManager; import org.apache.kylin.job.execution.ExecutableState; @@ -401,13 +403,24 @@ public class TableService extends BasicService { String outPath = getConfig().getHdfsWorkingDirectory() + "cardinality/" + job.getId() + "/" + tableName; String param = "-table " + tableName + " -output " + outPath + " -project " + prj; - MapReduceExecutable step1 = new MapReduceExecutable(); + if (table.isView()) { + logger.info("table {} is view, calculate cardinality with HQL.", table.getIdentity()); + ShellExecutable step1 = new ShellExecutable(); - step1.setMapReduceJobClass(HiveColumnCardinalityJob.class); - step1.setMapReduceParams(param); - step1.setParam("segmentId", tableName); + String hql = createCalCardinalityHQL(table, outPath); + HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); + hiveCmdBuilder.addStatement(hql); + step1.setCmd(hiveCmdBuilder.build()); - job.addTask(step1); + job.addTask(step1); + } else { + MapReduceExecutable step1 = new MapReduceExecutable(); + step1.setMapReduceJobClass(HiveColumnCardinalityJob.class); + step1.setMapReduceParams(param); + step1.setParam("segmentId", tableName); + + job.addTask(step1); + } HadoopShellExecutable step2 = new HadoopShellExecutable(); @@ -425,4 +438,35 @@ public class TableService extends BasicService { String[] dbTableName = HadoopUtil.parseHiveTableName(tableName); return (dbTableName[0] + "." + dbTableName[1]).toUpperCase(); } + + String createCalCardinalityHQL(TableDesc table, String outPath) { + ColumnDesc[] columnDescs = table.getColumns(); + int len = columnDescs.length; + + StringBuilder sb = new StringBuilder(); + sb.append("INSERT OVERWRITE DIRECTORY '"); + sb.append(outPath); + sb.append("' "); + sb.append("ROW FORMAT DELIMITED "); + sb.append("FIELDS TERMINATED BY '\\t' "); + sb.append("SELECT "); + for(int i = 0; i < len; i++) { + String columnName = columnDescs[i].getName(); + if(i == 0) { + sb.append("'" + columnName + "',"); + } else { + sb.append("'\\n" + columnName + "',"); + } + sb.append("COUNT (DISTINCT "); + sb.append(columnName); + if(i == len - 1) { + sb.append(")"); + } else { + sb.append("),"); + } + } + sb.append(" FROM "); + sb.append(table.getIdentity()); + return sb.toString(); + } } diff --git a/server-base/src/test/java/org/apache/kylin/rest/service/TableServiceTest.java b/server-base/src/test/java/org/apache/kylin/rest/service/TableServiceTest.java new file mode 100644 index 000000000..2c4af7f57 --- /dev/null +++ b/server-base/src/test/java/org/apache/kylin/rest/service/TableServiceTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.rest.service; + +import org.apache.kylin.metadata.model.ColumnDesc; +import org.apache.kylin.metadata.model.TableDesc; +import org.junit.Assert; +import org.junit.Test; + +public class TableServiceTest { + + @Test + public void testCreateCalCardinalityHQL() { + TableDesc tableDesc = new TableDesc(); + tableDesc.setName("test_db.test_table"); + + ColumnDesc columnDesc1 = new ColumnDesc("column_1", "column_1", "int", null, null, "1", null); + ColumnDesc columnDesc2 = new ColumnDesc("column_2", "column_2", "string", null, null, "2", null); + ColumnDesc columnDesc3 = new ColumnDesc("column_3", "column_3", "int", null, null, "3", null); + ColumnDesc[] columnDescs = {columnDesc1, columnDesc2, columnDesc3}; + tableDesc.setColumns(columnDescs); + + String outPath = "/temp"; + String expected = "INSERT OVERWRITE DIRECTORY '/temp' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' SELECT " + + "'column_1',COUNT (DISTINCT column_1)," + + "'\\ncolumn_2',COUNT (DISTINCT column_2)," + + "'\\ncolumn_3',COUNT (DISTINCT column_3)" + + " FROM test_db.test_table"; + Assert.assertEquals(expected.toUpperCase(), new TableService().createCalCardinalityHQL(tableDesc, outPath).toUpperCase()); + } +} -- 2.14.3 (Apple Git-98)