diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index 956fd7b..cd4dc38 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -25,6 +25,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; /** * Calculate the length of the strings in the input column vector, and store @@ -74,11 +76,15 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; + // We should not consider tailing spaces for CHAR type. + PrimitiveTypeInfo typeInfo = (PrimitiveTypeInfo) this.inputTypeInfos[0]; + PrimitiveCategory category = typeInfo.getPrimitiveCategory(); + if (inputColVector.isRepeating) { if (inputColVector.noNulls || !inputIsNull[0]) { // Set isNull before call in case it changes it mind. outputIsNull[0] = false; - resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); + resultLen[0] = utf8StringLength(vector[0], start[0], length[0], category); } else { outputIsNull[0] = true; outputColVector.noNulls = false; @@ -97,12 +103,12 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { final int i = sel[j]; // Set isNull before call in case it changes it mind. outputIsNull[i] = false; - resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); + resultLen[i] = utf8StringLength(vector[i], start[i], length[i], category); } } else { for(int j = 0; j != n; j++) { final int i = sel[j]; - resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); + resultLen[i] = utf8StringLength(vector[i], start[i], length[i], category); } } } else { @@ -114,7 +120,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { outputColVector.noNulls = true; } for(int i = 0; i != n; i++) { - resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); + resultLen[i] = utf8StringLength(vector[i], start[i], length[i], category); } } } else /* there are nulls in the inputColVector */ { @@ -127,7 +133,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { int i = sel[j]; outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { - resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); + resultLen[i] = utf8StringLength(vector[i], start[i], length[i], category); } } outputColVector.isRepeating = false; @@ -135,7 +141,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { for(int i = 0; i != n; i++) { outputColVector.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { - resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); + resultLen[i] = utf8StringLength(vector[i], start[i], length[i], category); } } } @@ -146,7 +152,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { * Return length in characters of UTF8 string in byte array * beginning at start that is len bytes long. */ - static long utf8StringLength(byte[] s, int start, int len) { + static long utf8StringLength(byte[] s, int start, int len, PrimitiveCategory category) { long resultLength = 0; for (int i = start; i < start + len; i++) { @@ -158,6 +164,18 @@ static long utf8StringLength(byte[] s, int start, int len) { resultLength++; } } + + // Adjust length if the column type is CHAR + if (category == PrimitiveCategory.CHAR) { + for (int i = start + len -1; i >= start; i--) { + if(s[i] == 32) { + resultLength--; + } else { + break; + } + } + } + return resultLength; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index 3d61c33..c833027 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -453,7 +453,7 @@ public CharDataWriter(HiveCharObjectInspector inspector) { @Override public void write(Object value) { - String v = inspector.getPrimitiveJavaObject(value).getStrippedValue(); + String v = inspector.getPrimitiveJavaObject(value).getPaddedValue(); recordConsumer.addBinary(Binary.fromString(v)); } }