diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java index 409de7c..d13f4f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; class ColumnStatisticsImpl implements ColumnStatistics { @@ -335,8 +336,8 @@ public String toString() { protected static final class StringStatisticsImpl extends ColumnStatisticsImpl implements StringColumnStatistics { - private String minimum = null; - private String maximum = null; + private Text minimum = null; + private Text maximum = null; private long sum = 0; StringStatisticsImpl() { @@ -346,10 +347,10 @@ public String toString() { super(stats); OrcProto.StringStatistics str = stats.getStringStatistics(); if (str.hasMaximum()) { - maximum = str.getMaximum(); + maximum = new Text(str.getMaximum()); } if (str.hasMinimum()) { - minimum = str.getMinimum(); + minimum = new Text(str.getMinimum()); } if(str.hasSum()) { sum = str.getSum(); @@ -365,16 +366,15 @@ void reset() { } @Override - void updateString(String value) { + void updateString(Text value) { if (minimum == null) { - minimum = value; - maximum = value; + maximum = minimum = new Text(value.copyBytes()); } else if (minimum.compareTo(value) > 0) { - minimum = value; + minimum = new Text(value.copyBytes()); } else if (maximum.compareTo(value) < 0) { - maximum = value; + maximum = new Text(value.copyBytes()); } - sum += value.length(); + sum += value.getLength(); } @Override @@ -382,13 +382,18 @@ void merge(ColumnStatisticsImpl other) { super.merge(other); StringStatisticsImpl str = (StringStatisticsImpl) other; if (minimum == null) { - minimum = str.minimum; - maximum = str.maximum; + if(str.minimum != null) { + maximum = new Text(str.getMaximum()); + minimum = new Text(str.getMinimum()); + } else { + /* both are empty */ + maximum = minimum = null; + } } else if (str.minimum != null) { if (minimum.compareTo(str.minimum) > 0) { - minimum = str.minimum; + minimum = new Text(str.getMinimum()); } else if (maximum.compareTo(str.maximum) < 0) { - maximum = str.maximum; + maximum = new Text(str.getMaximum()); } } sum += str.sum; @@ -400,8 +405,8 @@ void merge(ColumnStatisticsImpl other) { OrcProto.StringStatistics.Builder str = OrcProto.StringStatistics.newBuilder(); if (getNumberOfValues() != 0) { - str.setMinimum(minimum); - str.setMaximum(maximum); + str.setMinimum(getMinimum()); + str.setMaximum(getMaximum()); str.setSum(sum); } result.setStringStatistics(str); @@ -410,12 +415,12 @@ void merge(ColumnStatisticsImpl other) { @Override public String getMinimum() { - return minimum; + return minimum == null ? null : minimum.toString(); } @Override public String getMaximum() { - return maximum; + return maximum == null ? null : maximum.toString(); } @Override @@ -428,9 +433,9 @@ public String toString() { StringBuilder buf = new StringBuilder(super.toString()); if (getNumberOfValues() != 0) { buf.append(" min: "); - buf.append(minimum); + buf.append(getMinimum()); buf.append(" max: "); - buf.append(maximum); + buf.append(getMaximum()); buf.append(" sum: "); buf.append(sum); } @@ -733,7 +738,7 @@ void updateDouble(double value) { throw new UnsupportedOperationException("Can't update double"); } - void updateString(String value) { + void updateString(Text value) { throw new UnsupportedOperationException("Can't update string"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java index 25fa0eb..6094175 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java @@ -38,7 +38,11 @@ public StringRedBlackTree(int initialCapacity) { public int add(String value) { newKey.set(value); - // if the key is new, add it to our byteArray and store the offset & length + return addNewKey(); + } + + private int addNewKey() { + // if the newKey is actually new, add it to our byteArray and store the offset & length if (add()) { int len = newKey.getLength(); keyOffsets.add(byteArray.add(newKey.getBytes(), 0, len)); @@ -46,6 +50,11 @@ public int add(String value) { return lastAdd; } + public int add(Text value) { + newKey.set(value); + return addNewKey(); + } + @Override protected int compareValue(int position) { int start = keyOffsets.get(position); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index ba69246..29379dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -1027,20 +1027,20 @@ void recordPosition(PositionRecorder recorder) throws IOException { } /** - * Method to retrieve string values from the value object, which can be overridden + * Method to retrieve text values from the value object, which can be overridden * by subclasses. * @param obj value - * @return String value from obj + * @return Text text value from obj */ - String getStringValue(Object obj) { - return ((StringObjectInspector) inspector).getPrimitiveJavaObject(obj); + Text getTextValue(Object obj) { + return ((StringObjectInspector) inspector).getPrimitiveWritableObject(obj); } @Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { - String val = getStringValue(obj); + Text val = getTextValue(obj); rows.add(dictionary.add(val)); indexStatistics.updateString(val); } @@ -1189,9 +1189,9 @@ long estimateMemory() { * Override base class implementation to support char values. */ @Override - String getStringValue(Object obj) { + Text getTextValue(Object obj) { return (((HiveCharObjectInspector) inspector) - .getPrimitiveJavaObject(obj)).getValue(); + .getPrimitiveWritableObject(obj)).getTextValue(); } } @@ -1211,9 +1211,9 @@ String getStringValue(Object obj) { * Override base class implementation to support varchar values. */ @Override - String getStringValue(Object obj) { + Text getTextValue(Object obj) { return (((HiveVarcharObjectInspector) inspector) - .getPrimitiveJavaObject(obj)).getValue(); + .getPrimitiveWritableObject(obj)).getTextValue(); } }