From b573e39b3ed0cdfab23073b920c8fa35c34f4931 Mon Sep 17 00:00:00 2001 From: shaofengshi Date: Sat, 3 Jan 2015 20:20:48 +0800 Subject: [PATCH] Another try: use field schema object to get the field value. --- .../cardinality/ColumnCardinalityMapper.java | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/job/src/main/java/com/kylinolap/job/hadoop/cardinality/ColumnCardinalityMapper.java b/job/src/main/java/com/kylinolap/job/hadoop/cardinality/ColumnCardinalityMapper.java index 919b682..c576f2f 100644 --- a/job/src/main/java/com/kylinolap/job/hadoop/cardinality/ColumnCardinalityMapper.java +++ b/job/src/main/java/com/kylinolap/job/hadoop/cardinality/ColumnCardinalityMapper.java @@ -27,6 +27,9 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; import com.kylinolap.common.hll.HyperLogLogPlusCounter; import com.kylinolap.cube.kv.RowConstants; @@ -45,7 +48,27 @@ @Override public void map(T key, HCatRecord value, Context context) throws IOException, InterruptedException { + HCatSchema schema = HCatInputFormat.getTableSchema(context.getConfiguration()); Integer columnSize = context.getConfiguration().getInt(HiveColumnCardinalityJob.KEY_TABLE_COLUMN_NUMBER, 100); + + Iterator it = schema.getFields().iterator(); + HCatFieldSchema field; + Object fieldValue; + int m = 0; + while(it.hasNext()) { + field = it.next(); + fieldValue = value.get(field.getName(), schema); + if(fieldValue == null) + continue; + + if(counter <5 && m <3) { + System.out.println("Get row " + counter + " column " + m + " value: " + fieldValue.toString()); + } + getHllc(m).add(Bytes.toBytes(fieldValue.toString())); + m++; + } + + /* for (int m = 0; m < columnSize; m++) { Object cell = value.get(m); if(counter <5 && m <3) { @@ -53,6 +76,7 @@ public void map(T key, HCatRecord value, Context context) throws IOException, In } getHllc(m).add(Bytes.toBytes(cell.toString())); } + */ counter++; }