diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java index a3b8f76..1f9cead 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java @@ -48,6 +48,10 @@ public LazyByte(LazyByte copy) { @Override public void init(ByteArrayRef bytes, int start, int length) { + if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) { + isNull = true; + return; + } try { data.set(parseByte(bytes.getData(), start, length, 10)); isNull = false; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java index 05ca4e9..35c2141 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java @@ -46,6 +46,10 @@ public LazyDouble(LazyDouble copy) { @Override public void init(ByteArrayRef bytes, int start, int length) { String byteData = null; + if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) { + isNull = true; + return; + } try { byteData = Text.decode(bytes.getData(), start, length); data.set(Double.parseDouble(byteData)); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java index ad82ebf..22742aa 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java @@ -51,6 +51,10 @@ public LazyInteger(LazyInteger copy) { @Override public void init(ByteArrayRef bytes, int start, int length) { + if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) { + isNull = true; + return; + } try { data.set(parseInt(bytes.getData(), start, length, 10)); isNull = false; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java index a9779a0..c0d52b9 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java @@ -51,6 +51,10 @@ public LazyLong(LazyLong copy) { @Override public void init(ByteArrayRef bytes, int start, int length) { + if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) { + isNull = true; + return; + } try { data.set(parseLong(bytes.getData(), start, length, 10)); isNull = false; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java index f04e131..b8b9488 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java @@ -48,6 +48,10 @@ public LazyShort(LazyShort copy) { @Override public void init(ByteArrayRef bytes, int start, int length) { + if (!LazyUtils.isNumberMaybe(bytes.getData(), start, length)) { + isNull = true; + return; + } try { data.set(parseShort(bytes.getData(), start, length)); isNull = false; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 5c58f6b..36e1f45 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -81,6 +81,31 @@ public static int digit(int b, int radix) { } /** + * returns false, when the bytes definitely cannot be parsed into a base-10 + * Number (Long or a Double) + * + * If it returns true, the bytes might still be invalid, but not obviously. + */ + + public static boolean isNumberMaybe(byte[] buf, int offset, int len) { + if (len == 0) { + // blank string + return false; + } + if (len == 1) { + // space usually + return Character.isDigit(buf[offset]); + } + if (len == 2) { + // \N or -1 (allow latter) + return Character.isDigit(buf[offset + 1]) + || Character.isDigit(buf[offset + 0]); + } + // maybe valid - too expensive to check without a parse + return true; + } + + /** * Returns -1 if the first byte sequence is lexicographically less than the * second; returns +1 if the second byte sequence is lexicographically less * than the first; otherwise return 0.