diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java index c50af8d..bada657 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java @@ -233,16 +233,7 @@ public String getOutputType() { } public boolean check(byte[] byteS, int start, int len) { - int lenSub = byteSub.length; - if (len != lenSub) { - return false; - } - for (int i = start, j = 0; j < len; i++, j++) { - if (byteS[i] != byteSub[j]) { - return false; - } - } - return true; + return StringExpr.equal(byteSub, 0, byteSub.length, byteS, start, len); } } @@ -261,11 +252,7 @@ public boolean check(byte[] byteS, int start, int len) { } public boolean check(byte[] byteS, int start, int len) { - int lenSub = byteSub.length; - if (len < byteSub.length) { - return false; - } - return StringExpr.equal(byteSub, 0, lenSub, byteS, start, lenSub); + return StringExpr.equal(byteSub, 0, byteSub.length, byteS, start, len); } } @@ -284,11 +271,7 @@ public boolean check(byte[] byteS, int start, int len) { } public boolean check(byte[] byteS, int start, int len) { - int lenSub = byteSub.length; - if (len < lenSub) { - return false; - } - return StringExpr.equal(byteSub, 0, lenSub, byteS, start + len - lenSub, lenSub); + return StringExpr.equal(byteSub, 0, byteSub.length, byteS, start + len - byteSub.length, len); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java index 9ac9548..bb7fb9d 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectStringColLikeStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.Text; @@ -163,15 +164,11 @@ private static boolean find(Text s, Text sub, int startS, int endS) { int lenSub = sub.getLength(); boolean match = false; for (int i = startS; (i < endS - lenSub + 1) && (!match); i++) { - match = true; - for (int j = 0; j < lenSub; j++) { - if (byteS[j + i] != byteSub[j]) { - match = false; - break; - } + if (StringExpr.equal(byteS, i, lenSub, byteSub, 0, lenSub)) { + return true; } } - return match; + return false; } public BooleanWritable evaluate(Text s, Text likePattern) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java index 7916a6f..515dc1f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java +++ serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java @@ -19,13 +19,10 @@ package org.apache.hadoop.hive.serde2; import java.io.IOException; -import java.util.Arrays; import org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream; import org.apache.hadoop.hive.common.io.NonSyncByteArrayOutputStream; -import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.io.WritableComparator; /** * Extensions to bytearrayinput/output streams. @@ -107,12 +104,7 @@ public boolean arraysEquals(Output output) { if (count != output.count) { return false; } - for (int i = 0; i < count; i++) { - if (buf[i] != output.buf[i]) { - return false; - } - } - return true; + return WritableComparator.compareBytes(buf, 0, count, output.buf, 0, output.buf.length) == 0; } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java index a4ecd9f..271c6d3 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java +++ serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.serde2.ByteStream.RandomAccessOutput; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.io.WritableUtils; import org.apache.hive.common.util.HashCodeUtil; @@ -259,12 +260,7 @@ public boolean isEqual(long leftOffset, int leftLength, long rightOffset, int ri leftFrom = getOffset(leftOffset), rightFrom = getOffset(rightOffset); byte[] leftBuffer = writeBuffers.get(leftIndex), rightBuffer = writeBuffers.get(rightIndex); if (leftFrom + leftLength <= wbSize && rightFrom + rightLength <= wbSize) { - for (int i = 0; i < leftLength; ++i) { - if (leftBuffer[leftFrom + i] != rightBuffer[rightFrom + i]) { - return false; - } - } - return true; + return WritableComparator.compareBytes(leftBuffer, leftFrom, leftLength, rightBuffer, rightFrom, rightLength) == 0; } for (int i = 0; i < leftLength; ++i) { if (leftFrom == wbSize) { @@ -292,27 +288,28 @@ private final boolean isEqual(byte[] left, int leftOffset, int rightIndex, int r // rightOffset is within the buffers byte[] rightBuffer = writeBuffers.get(rightIndex); if (rightFrom + length <= wbSize) { - // TODO: allow using unsafe optionally. // bounds check first, to trigger bugs whether the first byte matches or not if (left[leftOffset + length - 1] != rightBuffer[rightFrom + length - 1]) { return false; } - for (int i = 0; i < length; ++i) { - if (left[leftOffset + i] != rightBuffer[rightFrom + i]) { + return WritableComparator.compareBytes(left, leftOffset, length, rightBuffer, rightFrom, length) == 0; + } + int i = 0; + while (i < length) { + int remaining = length - i; + if (rightFrom + remaining < wbSize) { + return WritableComparator.compareBytes(left, leftOffset + i, remaining, rightBuffer, rightFrom, remaining) == 0; + } else { + int step = wbSize - rightFrom; + if (WritableComparator.compareBytes(left, leftOffset + i, step, rightBuffer, rightFrom, step) != 0) { return false; } - } - return true; - } - for (int i = 0; i < length; ++i) { - if (rightFrom == wbSize) { - ++rightIndex; + i += step; + + rightIndex++; rightBuffer = writeBuffers.get(rightIndex); rightFrom = 0; } - if (left[leftOffset + i] != rightBuffer[rightFrom++]) { - return false; - } } return true; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java index 264335c..2a15340 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -25,6 +25,7 @@ import java.sql.Date; import java.util.Arrays; +import org.apache.hadoop.io.WritableComparator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -321,12 +322,7 @@ private boolean checkNull(byte[] bytes, int start, int len) { return bytes[start] == nullSequenceBytes[0] && bytes[start+1] == nullSequenceBytes[1] && bytes[start+2] == nullSequenceBytes[2] && bytes[start+3] == nullSequenceBytes[3]; default: - for (int i = 0; i < nullSequenceBytes.length; i++) { - if (bytes[start + i] != nullSequenceBytes[i]) { - return false; - } - } - return true; + return WritableComparator.compareBytes(bytes, start, nullSequenceBytes.length, nullSequenceBytes, 0, nullSequenceBytes.length) == 0; } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java index 90817a5..aea08c2 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java @@ -21,6 +21,7 @@ import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.io.WritableComparator; /** * String expression evaluation helper functions. @@ -35,15 +36,7 @@ * positive if arg1 > arg2. */ public static int compare(byte[] arg1, int start1, int len1, byte[] arg2, int start2, int len2) { - for (int i = 0; i < len1 && i < len2; i++) { - // Note the "& 0xff" is just a way to convert unsigned bytes to signed integer. - int b1 = arg1[i + start1] & 0xff; - int b2 = arg2[i + start2] & 0xff; - if (b1 != b2) { - return b1 - b2; - } - } - return len1 - len2; + return WritableComparator.compareBytes(arg1, start1, len1, arg2, start2, len2); } /* Determine if two strings are equal from two byte arrays each @@ -66,35 +59,7 @@ public static boolean equal(byte[] arg1, final int start1, final int len1, return false; } - if (len1 == len2) { - // prove invariant to the compiler: len1 = len2 - // all array access between (start1, start1+len1) - // and (start2, start2+len2) are valid - // no more OOB exceptions are possible - final int step = 8; - final int remainder = len1 % step; - final int wlen = len1 - remainder; - // suffix first - for (int i = wlen; i < len1; i++) { - if (arg1[start1 + i] != arg2[start2 + i]) { - return false; - } - } - // SIMD loop - for (int i = 0; i < wlen; i += step) { - final int s1 = start1 + i; - final int s2 = start2 + i; - boolean neq = false; - for (int j = 0; j < step; j++) { - neq = (arg1[s1 + j] != arg2[s2 + j]) || neq; - } - if (neq) { - return false; - } - } - } - - return true; + return WritableComparator.compareBytes(arg1, start1, len1, arg2, start2, len2) == 0; } public static int characterCount(byte[] bytes) {