From 0e482291ee881ea3a9c6ef6d8cebfe14f2b78cb7 Mon Sep 17 00:00:00 2001 From: Ivan Suller Date: Wed, 30 Jan 2019 15:05:10 +0100 Subject: [PATCH] HIVE-20295: Remove !isNumber check after failed constant interpretation --- .../hive/ql/parse/TypeCheckProcFactory.java | 31 ++-- .../ql/parse/TestTypeCheckProcFactory.java | 146 ++++++++++++++++++ .../clientpositive/infer_const_type.q.out | 13 +- 3 files changed, 170 insertions(+), 20 deletions(-) create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/parse/TestTypeCheckProcFactory.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index b49bb3624b..a2dd554b6e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -106,6 +106,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; @@ -1345,7 +1346,8 @@ private static ExprNodeDesc interpretNodeAsStruct(ExprNodeDesc columnDesc, ExprN return valueDesc; } - private static ExprNodeDesc interpretNodeAs(PrimitiveTypeInfo colTypeInfo, ExprNodeDesc constChild) { + @VisibleForTesting + protected static ExprNodeDesc interpretNodeAs(PrimitiveTypeInfo colTypeInfo, ExprNodeDesc constChild) { if (constChild instanceof ExprNodeConstantDesc) { // Try to narrow type of constant Object constVal = ((ExprNodeConstantDesc) constChild).getValue(); @@ -1373,32 +1375,36 @@ private static TypeInfo adjustType(PrimitiveTypeInfo colTypeInfo, Object newCons return colTypeInfo; } + private static BigDecimal toBigDecimal(String val) { + if (!NumberUtils.isNumber(val)) { + throw new NumberFormatException("The given string is not a valid number: " + val); + } + return new BigDecimal(val.replaceAll("[dDfFlL]$", "")); + } + private static Object interpretConstantAsPrimitive(PrimitiveTypeInfo colTypeInfo, Object constVal, TypeInfo constTypeInfo) { - String constTypeInfoName = constTypeInfo.getTypeName(); if (constVal instanceof Number || constVal instanceof String) { try { PrimitiveTypeEntry primitiveTypeEntry = colTypeInfo.getPrimitiveTypeEntry(); if (PrimitiveObjectInspectorUtils.intTypeEntry.equals(primitiveTypeEntry)) { - return (new Integer(constVal.toString())); + return toBigDecimal(constVal.toString()).intValueExact(); } else if (PrimitiveObjectInspectorUtils.longTypeEntry.equals(primitiveTypeEntry)) { - return (new Long(constVal.toString())); + return toBigDecimal(constVal.toString()).longValueExact(); } else if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(primitiveTypeEntry)) { - return (new Double(constVal.toString())); + return Double.valueOf(constVal.toString()); } else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(primitiveTypeEntry)) { - return (new Float(constVal.toString())); + return Float.valueOf(constVal.toString()); } else if (PrimitiveObjectInspectorUtils.byteTypeEntry.equals(primitiveTypeEntry)) { - return (new Byte(constVal.toString())); + return toBigDecimal(constVal.toString()).byteValueExact(); } else if (PrimitiveObjectInspectorUtils.shortTypeEntry.equals(primitiveTypeEntry)) { - return (new Short(constVal.toString())); + return toBigDecimal(constVal.toString()).shortValueExact(); } else if (PrimitiveObjectInspectorUtils.decimalTypeEntry.equals(primitiveTypeEntry)) { return HiveDecimal.create(constVal.toString()); } - } catch (NumberFormatException nfe) { + } catch (NumberFormatException | ArithmeticException nfe) { LOG.trace("Failed to narrow type of constant", nfe); - if (!NumberUtils.isNumber(constVal.toString())) { - return null; - } + return null; } } @@ -1419,6 +1425,7 @@ private static Object interpretConstantAsPrimitive(PrimitiveTypeInfo colTypeInfo // if column type is char and constant type is string, then convert the constant to char // type with padded spaces. + String constTypeInfoName = constTypeInfo.getTypeName(); if (constTypeInfoName.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) && colTypeInfo instanceof CharTypeInfo) { final String constValue = constVal.toString(); final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestTypeCheckProcFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestTypeCheckProcFactory.java new file mode 100644 index 0000000000..66d024a162 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestTypeCheckProcFactory.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.mockito.Mockito.when; + +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +/** + * Parametrized test for the TypeCheckProcFactory. + * + */ +@RunWith(Parameterized.class) +public class TestTypeCheckProcFactory { + @Mock + private PrimitiveTypeInfo typeInfo; + @Mock + private ExprNodeConstantDesc nodeDesc; + + private DefaultExprProcessor testSubject; + + @Parameters(name = "{1}") + public static Collection data() { + return Arrays.asList(new Object[][] {{"127", PrimitiveObjectInspectorUtils.byteTypeEntry, (byte) 127, true}, + {"32767", PrimitiveObjectInspectorUtils.shortTypeEntry, (short) 32767, true}, + {"2147483647", PrimitiveObjectInspectorUtils.intTypeEntry, 2147483647, true}, + {"9223372036854775807", PrimitiveObjectInspectorUtils.longTypeEntry, 9223372036854775807L, true}, + {"111.1", PrimitiveObjectInspectorUtils.floatTypeEntry, 111.1f, false}, + {"111.1", PrimitiveObjectInspectorUtils.doubleTypeEntry, 111.1d, false}}); + } + + private final BigDecimal maxValue; + private final PrimitiveTypeEntry constType; + private final Object expectedValue; + private final boolean intType; + + public TestTypeCheckProcFactory(String maxValue, PrimitiveTypeEntry constType, Object expectedValue, + boolean intType) { + this.maxValue = new BigDecimal(maxValue); + this.constType = constType; + this.expectedValue = expectedValue; + this.intType = intType; + } + + @Before + public void init() { + MockitoAnnotations.initMocks(this); + testSubject = new DefaultExprProcessor(); + } + + public void testOneCase(Object constValue) { + when(nodeDesc.getValue()).thenReturn(constValue); + when(typeInfo.getPrimitiveTypeEntry()).thenReturn(constType); + + ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAs(typeInfo, nodeDesc); + + assertNotNull(result); + assertEquals(expectedValue, result.getValue()); + } + + public void testNullCase(Object constValue) { + when(nodeDesc.getValue()).thenReturn(constValue); + when(typeInfo.getPrimitiveTypeEntry()).thenReturn(constType); + + ExprNodeConstantDesc result = (ExprNodeConstantDesc) testSubject.interpretNodeAs(typeInfo, nodeDesc); + + assertNull(result); + } + + @Test + public void testWithSring() { + testOneCase(maxValue.toString()); + } + + @Test + public void testWithLSuffix() { + if (intType) { + testOneCase(maxValue.toString() + "L"); + } + } + + @Test + public void testWithZeroFraction() { + if (intType) { + testOneCase(maxValue.toString() + ".0"); + } + } + + @Test + public void testWithFSuffix() { + testOneCase(maxValue.toString() + "f"); + } + + @Test + public void testWithDSuffix() { + testOneCase(maxValue.toString() + "D"); + } + + @Test + public void testOverflow() { + if (intType) { + testNullCase(maxValue.add(BigDecimal.valueOf(1L)).toString()); + } + } + + @Test + public void testWithNonZeroFraction() { + if (intType) { + testNullCase("100.1"); + } + } + +} diff --git a/ql/src/test/results/clientpositive/infer_const_type.q.out b/ql/src/test/results/clientpositive/infer_const_type.q.out index b736f4b159..bbdb5be8ca 100644 --- a/ql/src/test/results/clientpositive/infer_const_type.q.out +++ b/ql/src/test/results/clientpositive/infer_const_type.q.out @@ -108,7 +108,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@infertypes #### A masked pattern was here #### 127 32767 12345 -12345 906.0 -307.0 1234 -WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE ti = '128' OR si = 32768 OR @@ -139,10 +138,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: infertypes - filterExpr: ((UDFToDouble(ti) = 128.0D) or (UDFToInteger(si) = 32768) or (UDFToDouble(i) = 2.147483648E9D) or (UDFToDouble(bi) = 9.223372036854776E18D) or null) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(bi) = 9.223372036854776E18D) or (UDFToDouble(i) = 2.147483648E9D) or (UDFToDouble(ti) = 128.0D) or (UDFToInteger(si) = 32768) or null) (type: boolean) + predicate: false (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) @@ -163,7 +161,6 @@ STAGE PLANS: Processor Tree: ListSink -WARNING: Comparing a bigint and a string may result in a loss of precision. PREHOOK: query: SELECT * FROM infertypes WHERE ti = '128' OR si = 32768 OR @@ -208,10 +205,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: infertypes - filterExpr: ((UDFToDouble(ti) = 127.0D) or (CAST( si AS decimal(5,0)) = 327) or (UDFToDouble(i) = -100.0D)) (type: boolean) + filterExpr: ((ti = 127Y) or (CAST( si AS decimal(5,0)) = 327) or (i = -100)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((CAST( si AS decimal(5,0)) = 327) or (UDFToDouble(i) = -100.0D) or (UDFToDouble(ti) = 127.0D)) (type: boolean) + predicate: ((CAST( si AS decimal(5,0)) = 327) or (i = -100) or (ti = 127Y)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) @@ -271,10 +268,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: infertypes - filterExpr: ((UDFToDouble(ti) < 127.0D) and (UDFToDouble(i) > 100.0D) and (UDFToDouble(str) = 1.57D)) (type: boolean) + filterExpr: ((ti < 127Y) and (i > 100) and (UDFToDouble(str) = 1.57D)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(i) > 100.0D) and (UDFToDouble(str) = 1.57D) and (UDFToDouble(ti) < 127.0D)) (type: boolean) + predicate: ((UDFToDouble(str) = 1.57D) and (i > 100) and (ti < 127Y)) (type: boolean) Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string) -- 2.18.0