From 75f10583360e51da0f210c142366b5fc2c41cd3d Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Mon, 28 Mar 2016 18:36:13 -0700 Subject: [PATCH] HIVE-13373 : Use most specific type for numerical constants --- .../hadoop/hive/ql/parse/TypeCheckProcFactory.java | 66 +----------- ql/src/test/queries/clientpositive/type_widening.q | 6 ++ .../results/clientpositive/type_widening.q.out | 112 +++++++++++++++++++++ 3 files changed, 120 insertions(+), 64 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 45dfd27..3e5e732 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -28,7 +28,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.Stack; import org.apache.commons.lang.StringUtils; @@ -62,7 +61,6 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; @@ -315,6 +313,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, v = Double.valueOf(expr.getText()); v = Long.valueOf(expr.getText()); v = Integer.valueOf(expr.getText()); + v = Short.valueOf(expr.getText()); + v = Byte.valueOf(expr.getText()); } } catch (NumberFormatException e) { // do nothing here, we will throw an exception in the following block @@ -991,71 +991,9 @@ protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, int constIdx = children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1; - Set inferTypes = new HashSet(Arrays.asList( - serdeConstants.TINYINT_TYPE_NAME.toLowerCase(), - serdeConstants.SMALLINT_TYPE_NAME.toLowerCase(), - serdeConstants.INT_TYPE_NAME.toLowerCase(), - serdeConstants.BIGINT_TYPE_NAME.toLowerCase(), - serdeConstants.FLOAT_TYPE_NAME.toLowerCase(), - serdeConstants.DOUBLE_TYPE_NAME.toLowerCase(), - serdeConstants.STRING_TYPE_NAME.toLowerCase() - )); - String constType = children.get(constIdx).getTypeString().toLowerCase(); String columnType = children.get(1 - constIdx).getTypeString().toLowerCase(); - if (inferTypes.contains(constType) && inferTypes.contains(columnType) - && !columnType.equalsIgnoreCase(constType)) { - Object originalValue = ((ExprNodeConstantDesc) children.get(constIdx)).getValue(); - String constValue = originalValue.toString(); - boolean triedDouble = false; - Number value = null; - try { - if (columnType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)) { - value = new Byte(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)) { - value = new Short(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) { - value = new Integer(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { - value = new Long(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { - value = new Float(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { - triedDouble = true; - value = new Double(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { - // Don't scramble the const type information if comparing to a string column, - // It's not useful to do so; as of now, there is also a hack in - // SemanticAnalyzer#genTablePlan that causes every column to look like a string - // a string down here, so number type information is always lost otherwise. - boolean isNumber = (originalValue instanceof Number); - triedDouble = !isNumber; - value = isNumber ? (Number)originalValue : new Double(constValue); - } - } catch (NumberFormatException nfe) { - // this exception suggests the precise type inference did not succeed - // we'll try again to convert it to double - // however, if we already tried this, or the column is NUMBER type and - // the operator is EQUAL, return false due to the type mismatch - if (triedDouble && - (genericUDF instanceof GenericUDFOPEqual - && !columnType.equals(serdeConstants.STRING_TYPE_NAME))) { - return new ExprNodeConstantDesc(false); - } - - try { - value = new Double(constValue); - } catch (NumberFormatException ex) { - return new ExprNodeConstantDesc(false); - } - } - - if (value != null) { - children.set(constIdx, new ExprNodeConstantDesc(value)); - } - } - // if column type is char and constant type is string, then convert the constant to char // type with padded spaces. final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory diff --git a/ql/src/test/queries/clientpositive/type_widening.q b/ql/src/test/queries/clientpositive/type_widening.q index b504cf9..222fefb 100644 --- a/ql/src/test/queries/clientpositive/type_widening.q +++ b/ql/src/test/queries/clientpositive/type_widening.q @@ -6,3 +6,9 @@ SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1; EXPLAIN SELECT * FROM (SELECT 0 AS numcol FROM src UNION ALL SELECT 9223372036854775807 AS numcol FROM src) a ORDER BY numcol; SELECT * FROM (SELECT 0 AS numcol FROM src UNION ALL SELECT 9223372036854775807 AS numcol FROM src) a ORDER BY numcol; +create table t1(a tinyint, b smallint); +explain select * from t1 where a = 2; +explain select * from t1 where b = 2; +explain select * from t1 where a = 200; +explain select * from t1 where b = 40000; +drop table t1; diff --git a/ql/src/test/results/clientpositive/type_widening.q.out b/ql/src/test/results/clientpositive/type_widening.q.out index 84e53f8..dbcc8a0 100644 --- a/ql/src/test/results/clientpositive/type_widening.q.out +++ b/ql/src/test/results/clientpositive/type_widening.q.out @@ -1098,3 +1098,115 @@ POSTHOOK: Input: default@src 9223372036854775807 9223372036854775807 9223372036854775807 +PREHOOK: query: create table t1(a tinyint, b smallint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1(a tinyint, b smallint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: explain select * from t1 where a = 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where a = 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (a = 2) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 2 (type: tinyint), b (type: smallint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink + +PREHOOK: query: explain select * from t1 where b = 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where b = 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (b = 2) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: a (type: tinyint), 2 (type: smallint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink + +PREHOOK: query: explain select * from t1 where a = 200 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where a = 200 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (UDFToShort(a) = 200) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: -56 (type: tinyint), b (type: smallint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink + +PREHOOK: query: explain select * from t1 where b = 40000 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where b = 40000 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (UDFToInteger(b) = 40000) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: a (type: tinyint), -25536 (type: smallint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink + +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 -- 1.7.12.4 (Apple Git-37)