From 1f0bb3e182115ff2a1daab4d0ba03a9088dede60 Mon Sep 17 00:00:00 2001 From: shaofengshi Date: Sat, 3 Jan 2015 22:01:08 +0800 Subject: [PATCH] Enhancement on cardinality calculation --- .../cardinality/ColumnCardinalityMapper.java | 38 +++++++++------------- .../cardinality/HiveColumnCardinalityJob.java | 3 -- .../com/kylinolap/rest/service/CubeService.java | 2 +- 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/job/src/main/java/com/kylinolap/job/hadoop/cardinality/ColumnCardinalityMapper.java b/job/src/main/java/com/kylinolap/job/hadoop/cardinality/ColumnCardinalityMapper.java index c576f2f..a93900d 100644 --- a/job/src/main/java/com/kylinolap/job/hadoop/cardinality/ColumnCardinalityMapper.java +++ b/job/src/main/java/com/kylinolap/job/hadoop/cardinality/ColumnCardinalityMapper.java @@ -20,6 +20,7 @@ import java.nio.ByteBuffer; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; import org.apache.hadoop.hbase.util.Bytes; @@ -42,41 +43,32 @@ private Map hllcMap = new HashMap(); public static final String DEFAULT_DELIM = ","; - + private int counter = 0; @Override public void map(T key, HCatRecord value, Context context) throws IOException, InterruptedException { HCatSchema schema = HCatInputFormat.getTableSchema(context.getConfiguration()); - Integer columnSize = context.getConfiguration().getInt(HiveColumnCardinalityJob.KEY_TABLE_COLUMN_NUMBER, 100); - - Iterator it = schema.getFields().iterator(); + + List fieldList = schema.getFields(); HCatFieldSchema field; Object fieldValue; - int m = 0; - while(it.hasNext()) { - field = it.next(); + Integer columnSize = fieldList.size(); + for (int m = 0; m < columnSize; m++) { + field = fieldList.get(m); fieldValue = value.get(field.getName(), schema); - if(fieldValue == null) - continue; + if (fieldValue == null) + fieldValue = "NULL"; - if(counter <5 && m <3) { - System.out.println("Get row " + counter + " column " + m + " value: " + fieldValue.toString()); - } - getHllc(m).add(Bytes.toBytes(fieldValue.toString())); - m++; - } - - /* - for (int m = 0; m < columnSize; m++) { - Object cell = value.get(m); - if(counter <5 && m <3) { - System.out.println("Get row " + counter + " column " + m + " value: " + cell.toString()); + if (counter < 5 && m < 10) { + System.out.println("Get row " + counter + " column '" + field.getName() + "' value: " + fieldValue); } - getHllc(m).add(Bytes.toBytes(cell.toString())); + + if (fieldValue != null) + getHllc(m).add(Bytes.toBytes(fieldValue.toString())); } - */ + counter++; } diff --git a/job/src/main/java/com/kylinolap/job/hadoop/cardinality/HiveColumnCardinalityJob.java b/job/src/main/java/com/kylinolap/job/hadoop/cardinality/HiveColumnCardinalityJob.java index 14a907c..4280f71 100644 --- a/job/src/main/java/com/kylinolap/job/hadoop/cardinality/HiveColumnCardinalityJob.java +++ b/job/src/main/java/com/kylinolap/job/hadoop/cardinality/HiveColumnCardinalityJob.java @@ -46,7 +46,6 @@ protected static final Option OPTION_TABLE = OptionBuilder.withArgName("table name").hasArg().isRequired(true).withDescription("The hive table name").create("table"); public static final String KEY_INPUT_DELIM = "INPUT_DELIM"; - public static final String KEY_TABLE_COLUMN_NUMBER = "TABLE_COLUMN_NUMBER"; public static final String OUTPUT_PATH = "/tmp/cardinality"; /** @@ -149,8 +148,6 @@ public int run(String[] args) throws Exception { table); System.out.println("Set input format as HCat on table '" + table + "'"); - HCatSchema tableSchema = HCatInputFormat.getTableSchema(job.getConfiguration()); - job.getConfiguration().set(KEY_TABLE_COLUMN_NUMBER, String.valueOf(tableSchema.size())); job.setInputFormatClass(HCatInputFormat.class); job.setMapperClass(ColumnCardinalityMapper.class); diff --git a/server/src/main/java/com/kylinolap/rest/service/CubeService.java b/server/src/main/java/com/kylinolap/rest/service/CubeService.java index 6051d91..702d60c 100644 --- a/server/src/main/java/com/kylinolap/rest/service/CubeService.java +++ b/server/src/main/java/com/kylinolap/rest/service/CubeService.java @@ -472,7 +472,7 @@ public void generateCardinality(String tableName, String format, String delimite } */ String jarPath = getKylinConfig().getKylinJobJarPath(); - String outPath = HiveColumnCardinalityJob.OUTPUT_PATH + "/" + tableName; + String outPath = HiveColumnCardinalityJob.OUTPUT_PATH + "/" + tableName.toUpperCase(); String[] args = new String[] {"-table", tableName, "-output", outPath }; HiveColumnCardinalityJob job = new HiveColumnCardinalityJob(jarPath, null);