diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java index 826bf53..a99374c 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/LazySimpleSerDeBench.java @@ -15,6 +15,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Date; import java.util.Random; import java.util.concurrent.TimeUnit; @@ -28,6 +29,7 @@ import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.lazy.LazyShort; import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; +import org.apache.hadoop.hive.serde2.lazy.fast.StringToDouble; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -62,7 +64,7 @@ @BenchmarkMode(Mode.AverageTime) @Fork(1) @State(Scope.Thread) - @OutputTimeUnit(TimeUnit.NANOSECONDS) + @OutputTimeUnit(TimeUnit.MILLISECONDS) public static abstract class AbstractDeserializer { public int[] offsets = new int[DEFAULT_DATA_SIZE]; @@ -449,8 +451,32 @@ public void bench() { @BenchmarkMode(Mode.AverageTime) @Fork(1) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + @Warmup(iterations = 4, time = 2, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 4, time = 2, timeUnit = TimeUnit.MILLISECONDS) @State(Scope.Thread) - @OutputTimeUnit(TimeUnit.NANOSECONDS) + public static class ParseDouble { + byte[] bytes = "1234567890.1234567890".getBytes(StandardCharsets.UTF_8); + + @Benchmark + public void floatingDecimalBench() { + for (int i = 0; i < DEFAULT_ITER_TIME; i++) { + StringToDouble.strtod(new String(bytes, 0, bytes.length, StandardCharsets.UTF_8)); + } + } + + @Benchmark + public void doubleBench() { + for (int i = 0; i < DEFAULT_ITER_TIME; i++) { + Double.parseDouble(new String(bytes, 0, bytes.length, StandardCharsets.UTF_8)); + } + } + } + + @BenchmarkMode(Mode.AverageTime) + @Fork(1) + @State(Scope.Thread) + @OutputTimeUnit(TimeUnit.MILLISECONDS) public static class GoodLazyDate { final LazyDate obj = new LazyDate( @@ -533,7 +559,7 @@ public void bench() { @BenchmarkMode(Mode.AverageTime) @Fork(1) @State(Scope.Thread) - @OutputTimeUnit(TimeUnit.NANOSECONDS) + @OutputTimeUnit(TimeUnit.MILLISECONDS) public static class GoodLazyTimestamp { final LazyTimestamp obj = new LazyTimestamp( @@ -614,7 +640,7 @@ public void bench() { public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder().include( - ".*" + LazySimpleSerDeBench.class.getSimpleName() + ".*").build(); + ".*" + LazySimpleSerDeBench.class.getSimpleName() + ".ParseDouble").build(); new Runner(opt).run(); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java index 264335c..997b15a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -447,8 +447,7 @@ public boolean readField(int fieldIndex) throws IOException { return false; } currentDouble = - Double.parseDouble( - new String(bytes, fieldStart, fieldLength, StandardCharsets.UTF_8)); + StringToDouble.strtod(new String(bytes, fieldStart, fieldLength, StandardCharsets.UTF_8)); return true; case STRING: case CHAR: diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/StringToDouble.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/StringToDouble.java new file mode 100644 index 0000000..c3d6798 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/StringToDouble.java @@ -0,0 +1,215 @@ +/* + * Source code for the "strtod" library procedure. + * + * Copyright 1988-1992 Regents of the University of California + * Permission to use, copy, modify, and distribute this + * software and its documentation for any purpose and without + * fee is hereby granted, provided that the above copyright + * notice appear in all copies. The University of California + * makes no representations about the suitability of this + * software for any purpose. It is provided "as is" without + * express or implied warranty. + */ +package org.apache.hadoop.hive.serde2.lazy.fast; + +public class StringToDouble { + static int maxExponent = 511; /* Largest possible base 10 exponent. Any + * exponent larger than this will already + * produce underflow or overflow, so there's + * no need to worry about additional digits. + */ + static double powersOf10[] = { /* Table giving binary powers of 10. Entry */ + 10., /* is 10^2^i. Used to convert decimal */ + 100., /* exponents into floating-point numbers. */ + 1.0e4, + 1.0e8, + 1.0e16, + 1.0e32, + 1.0e64, + 1.0e128, + 1.0e256 + }; + + public static double strtod(String string) + { + boolean signIsNegative = true; + boolean expSignIsNegative = true; + long fraction; + int d; + int p = 0; + int c; + int exp = 0; /* Exponent read from "EX" field. */ + int fracExp = 0; /* Exponent that derives from the fractional + * part. Under normal circumstatnces, it is + * the negative of the number of digits in F. + * However, if I is very long, the last digits + * of I get dropped (otherwise a long I with a + * large negative exponent could cause an + * unnecessary overflow on I alone). In this + * case, fracExp is incremented one for each + * dropped digit. */ + int mantSize; /* Number of digits in mantissa. */ + int decPt; /* Number of mantissa digits BEFORE decimal + * point. */ + int pExp; /* Temporarily holds location of exponent + * in string. */ + + /* + * Strip off leading blanks and check for a sign. + */ + + string = string.trim(); + char[] chars = string.toCharArray(); + if (chars[p] == '-') { + signIsNegative = true; + p += 1; + } else { + if (chars[p] == '+') { + p += 1; + } + signIsNegative = false; + } + + /* + * Count the number of digits in the mantissa (including the decimal + * point), and also locate the decimal point. + */ + + decPt = -1; + for (mantSize = 0; mantSize < chars.length; mantSize += 1) + { + c = chars[p]; + if (!isdigit(c)) { + if ((c != '.') || (decPt >= 0)) { + break; + } + decPt = mantSize; + } + p += 1; + } + + /* + * Now suck up the digits in the mantissa. Use two integers to + * collect 9 digits each (this is faster than using floating-point). + * If the mantissa has more than 18 digits, ignore the extras, since + * they can't affect the value anyway. + */ + + pExp = p; + p -= mantSize; + if (decPt < 0) { + decPt = mantSize; + } else { + mantSize -= 1; /* One of the digits was the point. */ + } + if (mantSize > 18) { + fracExp = decPt - 18; + mantSize = 18; + } else { + fracExp = decPt - mantSize; + } + int frac1, frac2; + if (mantSize == 0) { + if (signIsNegative) { + return -0.0d; + } + return 0.0d; + } else { + frac1 = 0; + for (; mantSize > 9; mantSize -= 1) + { + c = chars[p]; + p += 1; + if (c == '.') { + c = chars[p]; + p += 1; + } + frac1 = 10 * frac1 + (c - '0'); + } + frac2 = 0; + for (; mantSize > 0; mantSize -= 1) + { + c = chars[p]; + p += 1; + if (c == '.') { + c = chars[p]; + p += 1; + } + frac2 = 10 * frac2 + (c - '0'); + } + fraction = (1000000000L * frac1) + frac2; + } + + /* + * Skim off the exponent. + */ + + p = pExp; + + if (p < chars.length) { + if ((chars[p] == 'E') || (chars[p] == 'e')) { + p += 1; + if (p < chars.length) { + if (chars[p] == '-') { + expSignIsNegative = true; + p += 1; + } else { + if (chars[p] == '+') { + p += 1; + } + expSignIsNegative = false; + } + while (p < chars.length && isdigit(chars[p])) { + exp = exp * 10 + (chars[p] - '0'); + p += 1; + } + } + } + } + if (expSignIsNegative) { + exp = fracExp - exp; + } else { + exp = fracExp + exp; + } + + /* + * Generate a floating-point number that represents the exponent. + * Do this by processing the exponent one bit at a time to combine + * many powers of 2 of 10. Then combine the exponent with the + * fraction. + */ + + if (exp < 0) { + expSignIsNegative = true; + exp = -exp; + } else { + expSignIsNegative = false; + } + if (exp > maxExponent) { + exp = maxExponent; + } + + double dblExp = 1.0; + for (d = 0; exp != 0; exp >>= 1, d += 1) { + if ((exp & 1) == 1) { + dblExp *= powersOf10[d]; + } + } + + double dFraction = 0; + if (expSignIsNegative) { + dFraction = fraction / dblExp; + } else { + dFraction = fraction * dblExp; + } + + if (signIsNegative) { + return -dFraction; + } + return dFraction; + } + + private static boolean isdigit(int c) { + return '0' <= c && c <= '9'; + } +} diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazy/fast/TestStringToDouble.java serde/src/test/org/apache/hadoop/hive/serde2/lazy/fast/TestStringToDouble.java new file mode 100644 index 0000000..188427a --- /dev/null +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/fast/TestStringToDouble.java @@ -0,0 +1,44 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy.fast; + +import org.junit.Test; + +import java.util.Random; + +import static org.junit.Assert.*; + +public class TestStringToDouble { + @Test + public void testRandomBig() throws Exception { + Random random = new Random(); + for (int i = 0; i < 100; i++) { + double d = Double.longBitsToDouble(random.nextLong()); + String s = Double.toString(d); + assertEquals(s, d, Double.parseDouble(s), Double.MIN_VALUE); + assertEquals(s, d, StringToDouble.strtod(s), Double.MIN_VALUE); + } + } + + @Test + public void testRandomSmall() throws Exception { + Random random = new Random(); + for (int i = 0; i < 100; i++) { + double d = random.nextDouble(); + String s = Double.toString(d); + assertEquals(s, d, Double.parseDouble(s), Double.MIN_VALUE); + assertEquals(s, d, StringToDouble.strtod(s), Double.MIN_VALUE); + } + } +} \ No newline at end of file