diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/TestToInteger.java ql/src/test/org/apache/hadoop/hive/ql/udf/TestToInteger.java index a5f8a15..f4f676c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/TestToInteger.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/TestToInteger.java @@ -25,6 +25,6 @@ public void testTextToInteger() throws Exception{ Text t4 = new Text("1.1"); IntWritable i4 = ti.evaluate(t4); - assertNull(i4); + assertEquals(1, i4.get()); } } diff --git ql/src/test/queries/clientpositive/cast_to_int.q ql/src/test/queries/clientpositive/cast_to_int.q new file mode 100644 index 0000000..729ffdc --- /dev/null +++ ql/src/test/queries/clientpositive/cast_to_int.q @@ -0,0 +1,30 @@ +-- cast string floats to integer types +select + cast('1' as float), + cast('1.4' as float), + cast('1.6' as float), + cast('1' as int), + cast('1.4' as int), + cast('1.6' as int), + cast('1' as tinyint), + cast('1.4' as tinyint), + cast('1.6' as tinyint), + cast('1' as smallint), + cast('1.4' as smallint), + cast('1.6' as smallint), + cast('1' as bigint), + cast('1.4' as bigint), + cast('1.6' as bigint), + cast (cast('1' as float) as int), + cast(cast ('1.4' as float) as int), + cast(cast ('1.6' as float) as int), + cast('+1e5' as int), + cast('2147483647' as int), + cast('-2147483648' as int), + cast('32767' as smallint), + cast('-32768' as smallint), + cast('-128' as tinyint), + cast('127' as tinyint), + cast('1.0a' as int), + cast('-1.-1' as int) +from src limit 1; diff --git ql/src/test/results/clientpositive/cast_to_int.q.out ql/src/test/results/clientpositive/cast_to_int.q.out new file mode 100644 index 0000000..04da595 --- /dev/null +++ ql/src/test/results/clientpositive/cast_to_int.q.out @@ -0,0 +1,67 @@ +PREHOOK: query: -- cast string floats to integer types +select + cast('1' as float), + cast('1.4' as float), + cast('1.6' as float), + cast('1' as int), + cast('1.4' as int), + cast('1.6' as int), + cast('1' as tinyint), + cast('1.4' as tinyint), + cast('1.6' as tinyint), + cast('1' as smallint), + cast('1.4' as smallint), + cast('1.6' as smallint), + cast('1' as bigint), + cast('1.4' as bigint), + cast('1.6' as bigint), + cast (cast('1' as float) as int), + cast(cast ('1.4' as float) as int), + cast(cast ('1.6' as float) as int), + cast('+1e5' as int), + cast('2147483647' as int), + cast('-2147483648' as int), + cast('32767' as smallint), + cast('-32768' as smallint), + cast('-128' as tinyint), + cast('127' as tinyint), + cast('1.0a' as int), + cast('-1.-1' as int) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- cast string floats to integer types +select + cast('1' as float), + cast('1.4' as float), + cast('1.6' as float), + cast('1' as int), + cast('1.4' as int), + cast('1.6' as int), + cast('1' as tinyint), + cast('1.4' as tinyint), + cast('1.6' as tinyint), + cast('1' as smallint), + cast('1.4' as smallint), + cast('1.6' as smallint), + cast('1' as bigint), + cast('1.4' as bigint), + cast('1.6' as bigint), + cast (cast('1' as float) as int), + cast(cast ('1.4' as float) as int), + cast(cast ('1.6' as float) as int), + cast('+1e5' as int), + cast('2147483647' as int), + cast('-2147483648' as int), + cast('32767' as smallint), + cast('-32768' as smallint), + cast('-128' as tinyint), + cast('127' as tinyint), + cast('1.0a' as int), + cast('-1.-1' as int) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1.0 1.4 1.6 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 NULL 2147483647 -2147483648 32767 -32768 -128 127 NULL NULL diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java index 243f2b7..ad82ebf 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java @@ -136,11 +136,18 @@ public static int parseInt(byte[] bytes, int start, int length, int radix) { */ private static int parse(byte[] bytes, int start, int length, int offset, int radix, boolean negative) { + byte separator = '.'; int max = Integer.MIN_VALUE / radix; int result = 0, end = start + length; while (offset < end) { int digit = LazyUtils.digit(bytes[offset++], radix); if (digit == -1) { + if (bytes[offset-1] == separator) { + // We allow decimals and will return a truncated integer in that case. + // Therefore we won't throw an exception here (checking the fractional + // part happens below.) + break; + } throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length)); } @@ -155,6 +162,18 @@ private static int parse(byte[] bytes, int start, int length, int offset, } result = next; } + + // This is the case when we've encountered a decimal separator. The fractional + // part will not change the number, but we will verify that the fractional part + // is well formed. + while (offset < end) { + int digit = LazyUtils.digit(bytes[offset++], radix); + if (digit == -1) { + throw new NumberFormatException(LazyUtils.convertToString(bytes, start, + length)); + } + } + if (!negative) { result = -result; if (result < 0) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java index 15ea4f2..a9779a0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java @@ -138,11 +138,18 @@ public static long parseLong(byte[] bytes, int start, int length, int radix) { */ private static long parse(byte[] bytes, int start, int length, int offset, int radix, boolean negative) { + byte separator = '.'; long max = Long.MIN_VALUE / radix; long result = 0, end = start + length; while (offset < end) { int digit = LazyUtils.digit(bytes[offset++], radix); if (digit == -1 || max > result) { + if (bytes[offset-1] == separator) { + // We allow decimals and will return a truncated integer in that case. + // Therefore we won't throw an exception here (checking the fractional + // part happens below.) + break; + } throw new NumberFormatException(LazyUtils.convertToString(bytes, start, length)); } @@ -153,6 +160,18 @@ private static long parse(byte[] bytes, int start, int length, int offset, } result = next; } + + // This is the case when we've encountered a decimal separator. The fractional + // part will not change the number, but we will verify that the fractional part + // is well formed. + while (offset < end) { + int digit = LazyUtils.digit(bytes[offset++], radix); + if (digit == -1) { + throw new NumberFormatException(LazyUtils.convertToString(bytes, start, + length)); + } + } + if (!negative) { result = -result; if (result < 0) { diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java index 5e39d73..28eb868 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java @@ -62,12 +62,12 @@ public void testLazySimpleSerDe() throws Throwable { // Data Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\tNULL\t"); t.append(new byte[]{(byte)Integer.parseInt("10111111", 2)}, 0, 1); - StringBuffer sb = new StringBuffer("123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\tNULL\t"); + StringBuffer sb = new StringBuffer("123\t456\t789\t1000\t5.3\thive and hadoop\t1\tNULL\t"); String s = sb.append(new String(Base64.encodeBase64(new byte[]{(byte)Integer.parseInt("10111111", 2)}))).toString(); Object[] expectedFieldsData = {new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), - new Text("hive and hadoop"), null, null, new BytesWritable(new byte[]{(byte)Integer.parseInt("10111111", 2)})}; + new Text("hive and hadoop"), new IntWritable(1), null, new BytesWritable(new byte[]{(byte)Integer.parseInt("10111111", 2)})}; // Test deserializeAndSerialize(serDe, t, s, expectedFieldsData); @@ -128,11 +128,11 @@ public void testLazySimpleSerDeLastColumnTakesRest() throws Throwable { // Data Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t"); - String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta\tb\t"; + String s = "123\t456\t789\t1000\t5.3\thive and hadoop\t1\ta\tb\t"; Object[] expectedFieldsData = {new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), - new Text("hive and hadoop"), null, new Text("a\tb\t")}; + new Text("hive and hadoop"), new IntWritable(1), new Text("a\tb\t")}; // Test deserializeAndSerialize(serDe, t, s, expectedFieldsData); @@ -156,11 +156,11 @@ public void testLazySimpleSerDeExtraColumns() throws Throwable { // Data Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t"); - String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta"; + String s = "123\t456\t789\t1000\t5.3\thive and hadoop\t1\ta"; Object[] expectedFieldsData = {new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), - new Text("hive and hadoop"), null, new Text("a")}; + new Text("hive and hadoop"), new IntWritable(1), new Text("a")}; // Test deserializeAndSerialize(serDe, t, s, expectedFieldsData);