commit 36369f7b01c5521c2bd0db601f411120f143b5fe Author: Yu Li Date: Tue Jan 26 13:53:11 2016 +0800 HBASE-15171 Avoid counting duplicated kv and generating lots of small hfiles in PutSortReducer diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java index 0a27056..d18ea34 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/PutSortReducer.java @@ -22,12 +22,12 @@ import java.util.Iterator; import java.util.List; import java.util.TreeSet; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.mapreduce.Reducer; @@ -68,9 +68,11 @@ public class PutSortReducer extends for (Cell cell: cells) { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); map.add(kv); - curSize += kv.heapSize(); } } + for(KeyValue kv: map){ + curSize +=kv.heapSize(); + } } context.setStatus("Read " + map.size() + " entries of " + map.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")");