diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java index cc485ff71c..b6d3184ffe 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java @@ -200,8 +200,12 @@ public static void rightTrim(BytesColumnVector outV, int i, byte[] bytes, int st * return the new byte length. */ public static int truncate(byte[] bytes, int start, int length, int maxLength) { - int end = start + length; + if (length <= maxLength) { + // no change in length + return length; + } + int end = start + length; // count characters forward int j = start; int charCount = 0; @@ -223,24 +227,10 @@ public static int truncate(byte[] bytes, int start, int length, int maxLength) { * place the result into element i of a vector. */ public static void truncate(BytesColumnVector outV, int i, byte[] bytes, int start, int length, int maxLength) { - int end = start + length; - - // count characters forward - int j = start; - int charCount = 0; - while(j < end) { - // UTF-8 continuation bytes have 2 high bits equal to 0x80. - if ((bytes[j] & 0xc0) != 0x80) { - if (charCount == maxLength) { - break; - } - ++charCount; - } - j++; - } + final int newLength = truncate(bytes, start, length, maxLength); // set output vector - outV.setVal(i, bytes, start, (j - start)); + outV.setVal(i, bytes, start, newLength); } /* @@ -248,25 +238,12 @@ public static void truncate(BytesColumnVector outV, int i, byte[] bytes, int sta * return a byte array with only truncated bytes. */ public static byte[] truncateScalar(byte[] bytes, int maxLength) { - int end = bytes.length; + int newLength = truncate(bytes, 0, bytes.length, maxLength); - // count characters forward - int j = 0; - int charCount = 0; - while(j < end) { - // UTF-8 continuation bytes have 2 high bits equal to 0x80. - if ((bytes[j] & 0xc0) != 0x80) { - if (charCount == maxLength) { - break; - } - ++charCount; - } - j++; - } - if (j == end) { + if (newLength == bytes.length) { return bytes; } else { - return Arrays.copyOf(bytes, j); + return Arrays.copyOf(bytes, newLength); } } @@ -275,36 +252,15 @@ public static void truncate(BytesColumnVector outV, int i, byte[] bytes, int sta * return the new byte length. */ public static int rightTrimAndTruncate(byte[] bytes, int start, int length, int maxLength) { - int end = start + length; + int newLength = truncate(bytes, start, length, maxLength); - // count characters forward and watch for final run of pads - int j = start; - int charCount = 0; - int padRunStart = -1; - while(j < end) { - // UTF-8 continuation bytes have 2 high bits equal to 0x80. - if ((bytes[j] & 0xc0) != 0x80) { - if (charCount == maxLength) { - break; - } - if (bytes[j] == 0x20) { - if (padRunStart == -1) { - padRunStart = j; - } - } else { - padRunStart = -1; - } - ++charCount; - } else { - padRunStart = -1; + for (int i = start + newLength - 1; i >= start; i--) { + if (bytes[i] != 0x20) { + return i - start + 1; } - j++; - } - if (padRunStart != -1) { - return (padRunStart - start); - } else { - return (j - start); } + + return 0; } /* @@ -312,37 +268,10 @@ public static int rightTrimAndTruncate(byte[] bytes, int start, int length, int * place the result into element i of a vector. */ public static void rightTrimAndTruncate(BytesColumnVector outV, int i, byte[] bytes, int start, int length, int maxLength) { - int end = start + length; + final int newLength = rightTrimAndTruncate(bytes, start, length, maxLength); - // count characters forward and watch for final run of pads - int j = start; - int charCount = 0; - int padRunStart = -1; - while(j < end) { - // UTF-8 continuation bytes have 2 high bits equal to 0x80. - if ((bytes[j] & 0xc0) != 0x80) { - if (charCount == maxLength) { - break; - } - if (bytes[j] == 0x20) { - if (padRunStart == -1) { - padRunStart = j; - } - } else { - padRunStart = -1; - } - ++charCount; - } else { - padRunStart = -1; - } - j++; - } // set output vector - if (padRunStart != -1) { - outV.setVal(i, bytes, start, (padRunStart - start)); - } else { - outV.setVal(i, bytes, start, (j - start) ); - } + outV.setVal(i, bytes, start, newLength); } /* @@ -350,37 +279,12 @@ public static void rightTrimAndTruncate(BytesColumnVector outV, int i, byte[] by * return a byte array with only the trimmed and truncated bytes. */ public static byte[] rightTrimAndTruncateScalar(byte[] bytes, int maxLength) { - int end = bytes.length; + int newLength = rightTrimAndTruncate(bytes, 0, bytes.length, maxLength); - // count characters forward and watch for final run of pads - int j = 0; - int charCount = 0; - int padRunStart = -1; - while(j < end) { - // UTF-8 continuation bytes have 2 high bits equal to 0x80. - if ((bytes[j] & 0xc0) != 0x80) { - if (charCount == maxLength) { - break; - } - if (bytes[j] == 0x20) { - if (padRunStart == -1) { - padRunStart = j; - } - } else { - padRunStart = -1; - } - ++charCount; - } else { - padRunStart = -1; - } - j++; - } - if (padRunStart != -1) { - return Arrays.copyOf(bytes, padRunStart); - } else if (j == end) { + if (newLength == bytes.length) { return bytes; } else { - return Arrays.copyOf(bytes, j); + return Arrays.copyOf(bytes, newLength); } }