From cc54bb628f8f34c730261d4d49480b418dea983c Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 16 Feb 2017 17:05:08 -0800 Subject: [PATCH] HIVE-15957 : Follow Hive's rules for type inference instead of Calcite --- .../calcite/translator/SqlFunctionConverter.java | 33 ++++++++++++-------- .../queries/clientpositive/interval_arithmetic.q | 3 ++ .../test/results/clientpositive/interval_alt.q.out | 6 ++-- .../clientpositive/interval_arithmetic.q.out | 35 ++++++++++++++++++++++ .../llap/metadata_only_queries.q.out | 4 +-- .../clientpositive/metadata_only_queries.q.out | 4 +-- .../spark/metadata_only_queries.q.out | 4 +-- 7 files changed, 68 insertions(+), 21 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 5c85dce..85450c9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -329,6 +329,7 @@ private static String getName(GenericUDF hiveUDF) { StaticBlockBuilder() { registerFunction("+", SqlStdOperatorTable.PLUS, hToken(HiveParser.PLUS, "+")); + registerFunction("-", SqlStdOperatorTable.MINUS, hToken(HiveParser.MINUS, "-")); registerFunction("*", SqlStdOperatorTable.MULTIPLY, hToken(HiveParser.STAR, "*")); registerFunction("/", SqlStdOperatorTable.DIVIDE, hToken(HiveParser.DIVIDE, "/")); registerFunction("%", SqlStdOperatorTable.MOD, hToken(HiveParser.Identifier, "%")); @@ -482,21 +483,29 @@ public static SqlOperator getCalciteFn(String hiveUdfName, // this.So, bail out for now. throw new CalciteSemanticException("<=> is not yet supported for cbo.", UnsupportedFeature.Less_than_equal_greater_than); } - SqlOperator calciteOp = hiveToCalcite.get(hiveUdfName); - if (calciteOp == null) { - CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); - if ("-".equals(hiveUdfName)) { - // Calcite native - has broken inference for return type, so we override it with explicit return type - // e.g. timestamp - timestamp is inferred as timestamp, where it really should be interval. + SqlOperator calciteOp; + CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); + switch (hiveUdfName) { + // Follow hive's rules for type inference as oppose to Calcite's + // for return type. + //TODO: Perhaps we should do this for all functions, not just +,- + case "-": calciteOp = new SqlMonotonicBinaryOperator("-", SqlKind.MINUS, 40, true, uInf.returnTypeInference, uInf.operandTypeInference, OperandTypes.MINUS_OPERATOR); - } else { - calciteOp = new CalciteSqlFn(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference, - uInf.operandTypeInference, uInf.operandTypeChecker, - SqlFunctionCategory.USER_DEFINED_FUNCTION, deterministic); - } + break; + case "+": + calciteOp = new SqlMonotonicBinaryOperator("+", SqlKind.PLUS, 40, true, + uInf.returnTypeInference, uInf.operandTypeInference, OperandTypes.PLUS_OPERATOR); + break; + default: + calciteOp = hiveToCalcite.get(hiveUdfName); + if (null == calciteOp) { + calciteOp = new CalciteSqlFn(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference, + uInf.operandTypeInference, uInf.operandTypeChecker, + SqlFunctionCategory.USER_DEFINED_FUNCTION, deterministic); + } + break; } - return calciteOp; } diff --git a/ql/src/test/queries/clientpositive/interval_arithmetic.q b/ql/src/test/queries/clientpositive/interval_arithmetic.q index 06acbd7..445cdfe 100644 --- a/ql/src/test/queries/clientpositive/interval_arithmetic.q +++ b/ql/src/test/queries/clientpositive/interval_arithmetic.q @@ -159,4 +159,7 @@ select from interval_arithmetic_1 limit 2; +explain +select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1; +select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1; drop table interval_arithmetic_1; diff --git a/ql/src/test/results/clientpositive/interval_alt.q.out b/ql/src/test/results/clientpositive/interval_alt.q.out index 9884ec3..eba8420 100644 --- a/ql/src/test/results/clientpositive/interval_alt.q.out +++ b/ql/src/test/results/clientpositive/interval_alt.q.out @@ -137,7 +137,7 @@ STAGE PLANS: alias: t Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (2012-01-01 + IntervalDayLiteralProcessor(((- dt) * dt))) (type: timestamp), (2012-01-01 - IntervalDayLiteralProcessor(((- dt) * dt))) (type: timestamp), 2012-01-04 (type: date), (2012-01-01 + IntervalYearMonthLiteralProcessor(concat(dt, '-1'))) (type: date) + expressions: (2012-01-01 + IntervalDayLiteralProcessor(((- dt) * dt))) (type: timestamp), (2012-01-01 - IntervalDayLiteralProcessor(((- dt) * dt))) (type: timestamp), 2012-01-04 00:00:00.0 (type: timestamp), (2012-01-01 + IntervalYearMonthLiteralProcessor(concat(dt, '-1'))) (type: date) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -172,5 +172,5 @@ POSTHOOK: query: select POSTHOOK: type: QUERY POSTHOOK: Input: default@t #### A masked pattern was here #### -2011-12-31 00:00:00 2012-01-02 00:00:00 2012-01-04 2013-02-01 -2011-12-28 00:00:00 2012-01-05 00:00:00 2012-01-04 2014-02-01 +2011-12-31 00:00:00 2012-01-02 00:00:00 2012-01-04 00:00:00 2013-02-01 +2011-12-28 00:00:00 2012-01-05 00:00:00 2012-01-04 00:00:00 2014-02-01 diff --git a/ql/src/test/results/clientpositive/interval_arithmetic.q.out b/ql/src/test/results/clientpositive/interval_arithmetic.q.out index 64882f8..c1fc738 100644 --- a/ql/src/test/results/clientpositive/interval_arithmetic.q.out +++ b/ql/src/test/results/clientpositive/interval_arithmetic.q.out @@ -606,6 +606,41 @@ POSTHOOK: Input: default@interval_arithmetic_1 #### A masked pattern was here #### 109 20:30:40.246913578 89 02:14:26.000000000 109 20:30:40.246913578 89 02:14:26.000000000 +PREHOOK: query: explain +select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + TableScan + alias: interval_arithmetic_1 + Statistics: Num rows: 12288 Data size: 326837 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 2017-02-19 03:04:00.0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@interval_arithmetic_1 +#### A masked pattern was here #### +POSTHOOK: query: select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@interval_arithmetic_1 +#### A masked pattern was here #### +2017-02-19 03:04:00 PREHOOK: query: drop table interval_arithmetic_1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@interval_arithmetic_1 diff --git a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out index 25be543..c8190bd 100644 --- a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out @@ -343,7 +343,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -407,7 +407,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out index 2e3331e..57b59dd 100644 --- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -313,7 +313,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -367,7 +367,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out index dc96a0d..543d0ef 100644 --- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out @@ -331,7 +331,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -391,7 +391,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE File Output Operator -- 1.7.12.4 (Apple Git-37)