diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java index 159dd0f..efae82d 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.LazyByte; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -166,9 +167,11 @@ public ByteWritable evaluate(Text i) { if (i == null) { return null; } else { + if (!LazyUtils.isNumberMaybe(i.getBytes(), 0, i.getLength())) { + return null; + } try { - byteWritable - .set(LazyByte.parseByte(i.getBytes(), 0, i.getLength(), 10)); + byteWritable.set(LazyByte.parseByte(i.getBytes(), 0, i.getLength(), 10)); return byteWritable; } catch (NumberFormatException e) { // MySQL returns 0 if the string is not a well-formed numeric value. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java index 5763947..e932f11 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -164,6 +165,9 @@ public DoubleWritable evaluate(Text i) { if (i == null) { return null; } else { + if (!LazyUtils.isNumberMaybe(i.getBytes(), 0, i.getLength())) { + return null; + } try { doubleWritable.set(Double.valueOf(i.toString())); return doubleWritable; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java index e2183f4..119eaca 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -165,6 +166,9 @@ public FloatWritable evaluate(Text i) { if (i == null) { return null; } else { + if (!LazyUtils.isNumberMaybe(i.getBytes(), 0, i.getLength())) { + return null; + } try { floatWritable.set(Float.valueOf(i.toString())); return floatWritable; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java index 5f5d1fe..fc6540e 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -167,6 +168,9 @@ public IntWritable evaluate(Text i) { if (i == null) { return null; } else { + if (!LazyUtils.isNumberMaybe(i.getBytes(), 0, i.getLength())) { + return null; + } try { intWritable.set(LazyInteger .parseInt(i.getBytes(), 0, i.getLength(), 10)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java index 3eeabea..3d85abd 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.LazyLong; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -177,6 +178,9 @@ public LongWritable evaluate(Text i) { if (i == null) { return null; } else { + if (!LazyUtils.isNumberMaybe(i.getBytes(), 0, i.getLength())) { + return null; + } try { longWritable .set(LazyLong.parseLong(i.getBytes(), 0, i.getLength(), 10)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java index b9065b2..24533d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.LazyShort; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -167,6 +168,9 @@ public ShortWritable evaluate(Text i) { if (i == null) { return null; } else { + if (!LazyUtils.isNumberMaybe(i.getBytes(), 0, i.getLength())) { + return null; + } try { shortWritable.set(LazyShort.parseShort(i.getBytes(), 0, i.getLength(), 10)); diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out index 684d4d7..b0df617 100644 --- a/ql/src/test/results/clientpositive/pcr.q.out +++ b/ql/src/test/results/clientpositive/pcr.q.out @@ -5359,7 +5359,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -5547,7 +5547,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -5593,7 +5593,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -5775,7 +5775,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -5821,7 +5821,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out index fb17e72..b675577 100644 --- a/ql/src/test/results/clientpositive/pointlookup2.q.out +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out @@ -168,7 +168,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + predicate: (struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) @@ -1593,7 +1593,7 @@ STAGE PLANS: Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: (struct(_col0,_col3)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + predicate: (struct(_col3,_col0)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false