diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java index a1b63d5..826bf53 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java @@ -15,16 +15,19 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.Date; import java.util.Random; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyByte; +import org.apache.hadoop.hive.serde2.lazy.LazyDate; import org.apache.hadoop.hive.serde2.lazy.LazyDouble; import org.apache.hadoop.hive.serde2.lazy.LazyFloat; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.lazy.LazyShort; +import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -53,15 +56,14 @@ * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.serde.LazySimpleSerDeBench *

*/ + public static final int DEFAULT_ITER_TIME = 1000000; + public static final int DEFAULT_DATA_SIZE = 4096; @BenchmarkMode(Mode.AverageTime) @Fork(1) @State(Scope.Thread) @OutputTimeUnit(TimeUnit.NANOSECONDS) public static abstract class AbstractDeserializer { - public static final int DEFAULT_ITER_TIME = 1000000; - - public static final int DEFAULT_DATA_SIZE = 4096; public int[] offsets = new int[DEFAULT_DATA_SIZE]; public int[] sizes = new int[DEFAULT_DATA_SIZE]; @@ -445,6 +447,171 @@ public void bench() { } } + @BenchmarkMode(Mode.AverageTime) + @Fork(1) + @State(Scope.Thread) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public static class GoodLazyDate { + + final LazyDate obj = new LazyDate( + LazyPrimitiveObjectInspectorFactory.LAZY_DATE_OBJECT_INSPECTOR); + + public int[] offsets = new int[DEFAULT_DATA_SIZE]; + public int[] sizes = new int[DEFAULT_DATA_SIZE]; + protected final ByteArrayRef ref = new ByteArrayRef(); + + @Setup + public void setup() { + sizes = new int[DEFAULT_DATA_SIZE]; + offsets = new int[sizes.length]; + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + Random r = new Random(); + int len = 0; + final long base = -320000000L*1000L; // 1959 + for (int i = 0; i < DEFAULT_DATA_SIZE; i++) { + // -ve dates are also valid dates - the dates are within 1959 to 2027 + Date dt = new Date(base + (Math.abs(r.nextLong()) % (Integer.MAX_VALUE*1000L))); + byte[] ds = dt.toString().getBytes(); + sizes[i] = ds.length; + offsets[i] = len; + len += ds.length; + try { + bos.write(ds); + } catch (IOException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + } + ref.setData(bos.toByteArray()); + } + + @Benchmark + @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) + public void bench() { + for (int i = 0; i < DEFAULT_ITER_TIME; i++) { + obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); + } + } + } + + public static class RandomLazyDate extends RandomDataInitializer { + + final LazyDate obj = new LazyDate( + LazyPrimitiveObjectInspectorFactory.LAZY_DATE_OBJECT_INSPECTOR); + + public RandomLazyDate() { + super(4); + } + + + @Override + public void bench() { + for (int i = 0; i < DEFAULT_ITER_TIME; i++) { + obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); + } + } + } + + public static class WorstLazyDate extends RandomDataInitializer { + + final LazyDate obj = new LazyDate( + LazyPrimitiveObjectInspectorFactory.LAZY_DATE_OBJECT_INSPECTOR); + + public WorstLazyDate() { + super(8); + } + + @Override + public void bench() { + for (int i = 0; i < DEFAULT_ITER_TIME; i++) { + obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); + } + } + } + + @BenchmarkMode(Mode.AverageTime) + @Fork(1) + @State(Scope.Thread) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public static class GoodLazyTimestamp { + + final LazyTimestamp obj = new LazyTimestamp( + LazyPrimitiveObjectInspectorFactory.LAZY_TIMESTAMP_OBJECT_INSPECTOR); + + public int[] offsets = new int[DEFAULT_DATA_SIZE]; + public int[] sizes = new int[DEFAULT_DATA_SIZE]; + protected final ByteArrayRef ref = new ByteArrayRef(); + + @Setup + public void setup() { + sizes = new int[DEFAULT_DATA_SIZE]; + offsets = new int[sizes.length]; + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + Random r = new Random(); + int len = 0; + final long base = -320000000L * 1000L; // 1959 + for (int i = 0; i < DEFAULT_DATA_SIZE; i++) { + // -ve dates are also valid Timestamps - dates are within 1959 to 2027 + Date dt = new Date(base + (Math.abs(r.nextLong()) % (Integer.MAX_VALUE * 1000L))); + byte[] ds = String.format("%s 00:00:01", dt.toString()).getBytes(); + sizes[i] = ds.length; + offsets[i] = len; + len += ds.length; + try { + bos.write(ds); + } catch (IOException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + } + ref.setData(bos.toByteArray()); + } + + @Benchmark + @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) + public void bench() { + for (int i = 0; i < DEFAULT_ITER_TIME; i++) { + obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); + } + } + } + + public static class RandomLazyTimestamp extends RandomDataInitializer { + + final LazyTimestamp obj = new LazyTimestamp( + LazyPrimitiveObjectInspectorFactory.LAZY_TIMESTAMP_OBJECT_INSPECTOR); + + public RandomLazyTimestamp() { + super(4); + } + + @Override + public void bench() { + for (int i = 0; i < DEFAULT_ITER_TIME; i++) { + obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); + } + } + } + + public static class WorstLazyTimestamp extends RandomDataInitializer { + + final LazyTimestamp obj = new LazyTimestamp( + LazyPrimitiveObjectInspectorFactory.LAZY_TIMESTAMP_OBJECT_INSPECTOR); + + public WorstLazyTimestamp() { + super(8); + } + + @Override + public void bench() { + for (int i = 0; i < DEFAULT_ITER_TIME; i++) { + obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); + } + } + } + public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder().include( ".*" + LazySimpleSerDeBench.class.getSimpleName() + ".*").build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 2e8331a..e92d7b1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -20,6 +20,7 @@ import java.io.EOFException; import java.io.IOException; +import java.util.Arrays; import java.util.List; import org.slf4j.Logger; @@ -125,6 +126,7 @@ private VectorDeserializeRow() { private void allocateArrays(int count) { isConvert = new boolean[count]; projectionColumnNums = new int[count]; + Arrays.fill(projectionColumnNums, -1); sourceCategories = new Category[count]; sourcePrimitiveCategories = new PrimitiveCategory[count]; maxLengths = new int[count]; @@ -344,7 +346,10 @@ private void deserializeRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) throws IOException { final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; if (deserializeRead.readCheckNull()) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + if (projectionColumnNum != -1) { + // important: readCheckNull skips the actual data column + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + } return; } @@ -482,7 +487,10 @@ private void deserializeConvertRowColumn(VectorizedRowBatch batch, int batchInde int logicalColumnIndex) throws IOException { final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; if (deserializeRead.readCheckNull()) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + if (projectionColumnNum != -1) { + // important: readCheckNull skips the actual data column + VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java index 6979956..0cab25e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java @@ -28,7 +28,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -45,7 +44,6 @@ import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType; import org.apache.hadoop.hive.ql.plan.api.OperatorType; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -56,9 +54,9 @@ import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -400,9 +398,13 @@ private void determineColumnsToInclude(Configuration hconf) { columnsToIncludeTruncated = null; - List columnsToIncludeTruncatedList = ColumnProjectionUtils.getReadColumnIDs(hconf); - if (columnsToIncludeTruncatedList != null && - columnsToIncludeTruncatedList.size() > 0 && columnsToIncludeTruncatedList.size() < dataColumnCount ) { + List columnsToIncludeTruncatedList = null; + + if (oneRootOperator instanceof TableScanOperator) { + columnsToIncludeTruncatedList = ((TableScanOperator) oneRootOperator).getNeededColumnIDs(); + } + + if (columnsToIncludeTruncatedList != null) { // Partitioned columns will not be in the include list. @@ -421,9 +423,6 @@ private void determineColumnsToInclude(Configuration hconf) { break; } } - if (highestWantedColumnNum == -1) { - throw new RuntimeException("No columns to include?"); - } int newColumnCount = highestWantedColumnNum + 1; if (newColumnCount == dataColumnCount) { columnsToIncludeTruncated = columnsToInclude; @@ -431,7 +430,7 @@ private void determineColumnsToInclude(Configuration hconf) { columnsToIncludeTruncated = Arrays.copyOf(columnsToInclude, newColumnCount); } } - } + } /** Kryo ctor. */ public VectorMapOperator() { @@ -479,9 +478,11 @@ private void internalSetChildren(Configuration hconf) throws Exception { // so set it here to none. currentReadType = VectorMapOperatorReadType.NONE; - determineColumnsToInclude(hconf); - batchContext = conf.getVectorizedRowBatchCtx(); + dataColumnCount = batchContext.getDataColumnCount(); + partitionColumnCount = batchContext.getPartitionColumnCount(); + partitionValues = new Object[partitionColumnCount]; + determineColumnsToInclude(hconf); /* * Use a different batch for vectorized Input File Format readers so they can do their work @@ -499,10 +500,6 @@ private void internalSetChildren(Configuration hconf) throws Exception { batchCounter = 0; - dataColumnCount = batchContext.getDataColumnCount(); - partitionColumnCount = batchContext.getPartitionColumnCount(); - partitionValues = new Object[partitionColumnCount]; - /* * Create table related objects */ diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java index 0579ff2..c00faac 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java @@ -59,6 +59,10 @@ public LazyDate(LazyDate copy) { @Override public void init(ByteArrayRef bytes, int start, int length) { String s = null; + if (!LazyUtils.isDateMaybe(bytes.getData(), start, length)) { + isNull = true; + return; + } try { s = Text.decode(bytes.getData(), start, length); data.set(Date.valueOf(s)); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java index 8f0c3d2..56945d1 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java @@ -59,6 +59,10 @@ public LazyTimestamp(LazyTimestamp copy) { @Override public void init(ByteArrayRef bytes, int start, int length) { String s = null; + if (!LazyUtils.isDateMaybe(bytes.getData(), start, length)) { + isNull = true; + return; + } try { s = new String(bytes.getData(), start, length, "US-ASCII"); } catch (UnsupportedEncodingException e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 6d7369b..73c72e1 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -109,6 +109,18 @@ public static boolean isNumberMaybe(byte[] buf, int offset, int len) { } /** + * returns false, when the bytes definitely cannot be parsed into a date/timestamp. + * + * Y2k requirements and dash requirements say the string has to be at least + * yyyy-m-m = 8 bytes or more minimum; Timestamp needs to be at least 1 byte longer, + * but the Date check is necessary, but not sufficient. + */ + public static boolean isDateMaybe(byte[] buf, int offset, int len) { + // maybe valid - too expensive to check without a parse + return len >= 8; + } + + /** * Returns -1 if the first byte sequence is lexicographically less than the * second; returns +1 if the second byte sequence is lexicographically less * than the first; otherwise return 0. diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java index b375d26..c300b8b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -21,6 +21,7 @@ import java.io.UnsupportedEncodingException; import java.nio.charset.CharacterCodingException; import java.sql.Date; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -57,26 +58,25 @@ public final class LazySimpleDeserializeRead extends DeserializeRead { public static final Logger LOG = LoggerFactory.getLogger(LazySimpleDeserializeRead.class.getName()); - private int[] startPosition; + private final int[] startPosition; - private byte separator; - private boolean isEscaped; - private byte escapeChar; - private byte[] nullSequenceBytes; - private boolean isExtendedBooleanLiteral; - private boolean lastColumnTakesRest; + private final byte separator; + private final boolean isEscaped; + private final byte escapeChar; + private final byte[] nullSequenceBytes; + private final boolean isExtendedBooleanLiteral; + private final boolean lastColumnTakesRest; private byte[] bytes; private int start; - private int offset; private int end; private int fieldCount; private int fieldIndex; - private int fieldStart; - private int fieldLength; + private boolean hasEscapes; + private int minFieldCount; - private Text tempText; - private TimestampParser timestampParser; + private final Text tempText; + private final TimestampParser timestampParser; private boolean extraFieldWarned; private boolean missingFieldWarned; @@ -97,9 +97,17 @@ public LazySimpleDeserializeRead(TypeInfo[] typeInfos, lastColumnTakesRest = lazyParams.isLastColumnTakesRest(); fieldCount = typeInfos.length; - tempText = new Text(); + if (isEscaped) { + tempText = new Text(); + } else { + tempText = null; + } extraFieldWarned = false; missingFieldWarned = false; + + timestampParser = new TimestampParser(); + // setup min fields + setColumnsToInclude(columnsToInclude); } public LazySimpleDeserializeRead(TypeInfo[] typeInfos, LazySerDeParameters lazyParams) { @@ -107,8 +115,22 @@ public LazySimpleDeserializeRead(TypeInfo[] typeInfos, LazySerDeParameters lazyP } // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. + @SuppressWarnings("unused") private LazySimpleDeserializeRead() { super(); + minFieldCount = fieldCount = -1; + separator = 0; + tempText = null; + extraFieldWarned = false; + missingFieldWarned = false; + timestampParser = null; + + isEscaped = false; + escapeChar = '\0'; + nullSequenceBytes = null; + isExtendedBooleanLiteral = false; + lastColumnTakesRest = false; + startPosition = null; } /* @@ -117,10 +139,24 @@ private LazySimpleDeserializeRead() { @Override public void set(byte[] bytes, int offset, int length) { this.bytes = bytes; - this.offset = offset; - start = offset; - end = offset + length; - fieldIndex = -1; + this.start = offset; + this.end = offset + length; + this.fieldIndex = -1; + } + + @Override + public void setColumnsToInclude(boolean[] columnsToInclude) { + super.setColumnsToInclude(columnsToInclude); + int lastUsedField = fieldCount - 1; // last index + if (columnsToInclude != null) { + lastUsedField = -1; // all items are false in columns + for (int i = 0; i < columnsToInclude.length; i++) { + if (columnsToInclude[i]) { + lastUsedField = i; + } + } + } + minFieldCount = lastUsedField + 1; } /** @@ -131,7 +167,7 @@ public void set(byte[] bytes, int offset, int length) { */ private void parse() { - int structByteEnd = end; + final int structByteEnd = end; int fieldId = 0; int fieldByteBegin = start; int fieldByteEnd = start; @@ -145,7 +181,7 @@ private void parse() { } startPosition[fieldId] = fieldByteBegin; fieldId++; - if (fieldId == fieldCount || fieldByteEnd == structByteEnd) { + if (fieldId == minFieldCount || fieldByteEnd == structByteEnd) { // All fields have been parsed, or bytes have been parsed. // We need to set the startPosition of fields.length to ensure we // can use the same formula to calculate the length of each field. @@ -176,11 +212,33 @@ private void parse() { } // Missing fields? - if (!missingFieldWarned && fieldId < fieldCount) { + if (!missingFieldWarned && fieldId < minFieldCount) { doMissingFieldWarned(fieldId); } } + private boolean checkNull(byte[] bytes, int start, int len) { + if (len != nullSequenceBytes.length) { + return false; + } + switch(len) { + case 0: + return true; + case 2: + return bytes[start] == nullSequenceBytes[0] && bytes[start+1] == nullSequenceBytes[1]; + case 4: + return bytes[start] == nullSequenceBytes[0] && bytes[start+1] == nullSequenceBytes[1] + && bytes[start+2] == nullSequenceBytes[2] && bytes[start+3] == nullSequenceBytes[3]; + default: + for (int i = 0; i < nullSequenceBytes.length; i++) { + if (bytes[start + i] != nullSequenceBytes[i]) { + return false; + } + } + return true; + } + } + /* * Reads the NULL information for a field. * @@ -189,10 +247,15 @@ private void parse() { */ @Override public boolean readCheckNull() { + + if (minFieldCount == 0) { + return true; + } + if (fieldIndex == -1) { parse(); fieldIndex = 0; - } else if (fieldIndex + 1 >= fieldCount) { + } else if (fieldIndex + 1 >= minFieldCount) { return true; } else { fieldIndex++; @@ -203,25 +266,16 @@ public boolean readCheckNull() { return true; } - fieldStart = startPosition[fieldIndex]; - fieldLength = startPosition[fieldIndex + 1] - startPosition[fieldIndex] - 1; + final int fieldStart = startPosition[fieldIndex]; + final int fieldLength = startPosition[fieldIndex + 1] - startPosition[fieldIndex] - 1; if (fieldLength < 0) { return true; } // Is the field the configured string representing NULL? if (nullSequenceBytes != null) { - if (fieldLength == nullSequenceBytes.length) { - int i = 0; - while (true) { - if (bytes[fieldStart + i] != nullSequenceBytes[i]) { - break; - } - i++; - if (i >= fieldLength) { - return true; - } - } + if (checkNull(bytes, fieldStart, fieldLength)) { + return true; } } @@ -270,6 +324,9 @@ public boolean readCheckNull() { } break; case BYTE: + if (!LazyUtils.isNumberMaybe(bytes, fieldStart, fieldLength)) { + return true; + } try { currentByte = LazyByte.parseByte(bytes, fieldStart, fieldLength, 10); } catch (NumberFormatException e) { @@ -278,6 +335,9 @@ public boolean readCheckNull() { } break; case SHORT: + if (!LazyUtils.isNumberMaybe(bytes, fieldStart, fieldLength)) { + return true; + } try { currentShort = LazyShort.parseShort(bytes, fieldStart, fieldLength, 10); } catch (NumberFormatException e) { @@ -286,6 +346,9 @@ public boolean readCheckNull() { } break; case INT: + if (!LazyUtils.isNumberMaybe(bytes, fieldStart, fieldLength)) { + return true; + } try { currentInt = LazyInteger.parseInt(bytes, fieldStart, fieldLength, 10); } catch (NumberFormatException e) { @@ -294,6 +357,9 @@ public boolean readCheckNull() { } break; case LONG: + if (!LazyUtils.isNumberMaybe(bytes, fieldStart, fieldLength)) { + return true; + } try { currentLong = LazyLong.parseLong(bytes, fieldStart, fieldLength, 10); } catch (NumberFormatException e) { @@ -303,6 +369,9 @@ public boolean readCheckNull() { break; case FLOAT: { + if (!LazyUtils.isNumberMaybe(bytes, fieldStart, fieldLength)) { + return true; + } String byteData = null; try { byteData = Text.decode(bytes, fieldStart, fieldLength); @@ -319,6 +388,9 @@ public boolean readCheckNull() { break; case DOUBLE: { + if (!LazyUtils.isNumberMaybe(bytes, fieldStart, fieldLength)) { + return true; + } String byteData = null; try { byteData = Text.decode(bytes, fieldStart, fieldLength); @@ -337,7 +409,7 @@ public boolean readCheckNull() { case STRING: case CHAR: case VARCHAR: - if (isEscaped) { + if (isEscaped && hasEscapes) { LazyUtils.copyAndEscapeStringDataToText(bytes, fieldStart, fieldLength, escapeChar, tempText); currentBytes = tempText.getBytes(); currentBytesStart = 0; @@ -363,6 +435,9 @@ public boolean readCheckNull() { break; case DATE: { + if (!LazyUtils.isDateMaybe(bytes, fieldStart, fieldLength)) { + return true; + } String s = null; try { s = Text.decode(bytes, fieldStart, fieldLength); @@ -375,6 +450,9 @@ public boolean readCheckNull() { break; case TIMESTAMP: { + if (!LazyUtils.isDateMaybe(bytes, fieldStart, fieldLength)) { + return true; + } String s = null; try { s = new String(bytes, fieldStart, fieldLength, "US-ASCII"); @@ -388,9 +466,6 @@ public boolean readCheckNull() { return true; } else { try { - if (timestampParser == null) { - timestampParser = new TimestampParser(); - } currentTimestampWritable.set(timestampParser.parseTimestamp(s)); } catch (IllegalArgumentException e) { logExceptionMessage(bytes, fieldStart, fieldLength, "TIMESTAMP"); @@ -401,6 +476,9 @@ public boolean readCheckNull() { break; case INTERVAL_YEAR_MONTH: { + if (fieldLength == 0) { + return true; + } String s = null; try { s = Text.decode(bytes, fieldStart, fieldLength); @@ -413,6 +491,9 @@ public boolean readCheckNull() { break; case INTERVAL_DAY_TIME: { + if (fieldLength == 0) { + return true; + } String s = null; try { s = Text.decode(bytes, fieldStart, fieldLength); @@ -425,6 +506,9 @@ public boolean readCheckNull() { break; case DECIMAL: { + if (!LazyUtils.isNumberMaybe(bytes, fieldStart, fieldLength)) { + return true; + } String byteData = null; try { byteData = Text.decode(bytes, fieldStart, fieldLength);