From 5b816f559c3cd7fab95ef4bbd82c3a9a6029f61c Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Tue, 29 Mar 2016 19:01:24 -0700 Subject: [PATCH] HIVE-13381 : Timestamp & date should have precedence in type hierarchy than string group --- .../hadoop/hive/ql/exec/FunctionRegistry.java | 9 +- .../hive/ql/exec/vector/VectorizationContext.java | 12 +- .../hadoop/hive/ql/exec/TestFunctionRegistry.java | 18 ++- .../ql/exec/vector/TestVectorizationContext.java | 17 +-- .../test/queries/clientpositive/cast_on_constant.q | 7 + .../results/clientpositive/cast_on_constant.q.out | 160 +++++++++++++++++++++ 6 files changed, 198 insertions(+), 25 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/cast_on_constant.q create mode 100644 ql/src/test/results/clientpositive/cast_on_constant.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 56b96b4..1343b39 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -737,7 +737,14 @@ public static TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) { return getTypeInfoForPrimitiveCategory( (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); } - + // timestamp/date is higher precedence than String_GROUP + if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.DATE_GROUP) { + return b; + } + // date/timestamp is higher precedence than String_GROUP + if (pgB == PrimitiveGrouping.STRING_GROUP && pgA == PrimitiveGrouping.DATE_GROUP) { + return a; + } // Another special case, because timestamp is not implicitly convertible to numeric types. if ((pgA == PrimitiveGrouping.NUMERIC_GROUP || pgB == PrimitiveGrouping.NUMERIC_GROUP) && (pcA == PrimitiveCategory.TIMESTAMP || pcB == PrimitiveCategory.TIMESTAMP)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 1eb960d..30a0f5a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -155,7 +155,7 @@ VectorExpressionDescriptor vMap; - private List initialColumnNames; + private final List initialColumnNames; private List projectedColumns; private List projectionColumnNames; @@ -712,7 +712,7 @@ private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException genericUdf = new GenericUDFToDate(); break; case TIMESTAMP: - genericUdf = new GenericUDFToUnixTimeStamp(); + genericUdf = new GenericUDFTimestamp(); break; case INTERVAL_YEAR_MONTH: genericUdf = new GenericUDFToIntervalYearMonth(); @@ -1329,7 +1329,7 @@ public static InConstantType getInConstantTypeFromPrimitiveCategory(PrimitiveCat case INT: case LONG: return InConstantType.INT_FAMILY; - + case DATE: return InConstantType.TIMESTAMP; @@ -1339,16 +1339,16 @@ public static InConstantType getInConstantTypeFromPrimitiveCategory(PrimitiveCat case FLOAT: case DOUBLE: return InConstantType.FLOAT_FAMILY; - + case STRING: case CHAR: case VARCHAR: case BINARY: return InConstantType.STRING_FAMILY; - + case DECIMAL: return InConstantType.DECIMAL; - + case INTERVAL_YEAR_MONTH: case INTERVAL_DAY_TIME: diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java index 6a83c32..8488c21 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java @@ -253,9 +253,13 @@ public void testCommonClassComparison() { TypeInfoFactory.doubleTypeInfo); comparison(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo, - TypeInfoFactory.stringTypeInfo); + TypeInfoFactory.dateTypeInfo); comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.dateTypeInfo, - TypeInfoFactory.stringTypeInfo); + TypeInfoFactory.dateTypeInfo); + comparison(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.timestampTypeInfo); + comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo, + TypeInfoFactory.timestampTypeInfo); comparison(TypeInfoFactory.intTypeInfo, TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.doubleTypeInfo); @@ -364,15 +368,15 @@ public void testGetTypeInfoForPrimitiveCategory() { // non-qualified types should simply return the TypeInfo associated with that type assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, + (PrimitiveTypeInfo) varchar10, TypeInfoFactory.stringTypeInfo, PrimitiveCategory.STRING)); assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, - (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, PrimitiveCategory.STRING)); assertEquals(TypeInfoFactory.doubleTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo) TypeInfoFactory.doubleTypeInfo, - (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.doubleTypeInfo, + TypeInfoFactory.stringTypeInfo, PrimitiveCategory.DOUBLE)); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index e4c7529..bb37a04 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -23,14 +23,9 @@ import java.sql.Timestamp; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; - -import junit.framework.Assert; import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; @@ -73,11 +68,12 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnNotBetween; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn; @@ -144,13 +140,12 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFPower; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFRound; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Assert; import org.junit.Test; public class TestVectorizationContext { @@ -1215,12 +1210,12 @@ public void testBetweenFilters() throws HiveException { children1.set(2, new ExprNodeConstantDesc("2013-11-05 00:00:00.000")); children1.set(3, new ExprNodeConstantDesc("2013-11-06 00:00:00.000")); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); - assertEquals(FilterStringColumnBetween.class, ve.getClass()); + assertEquals(FilterTimestampColumnBetween.class, ve.getClass()); // timestamp NOT BETWEEN children1.set(0, new ExprNodeConstantDesc(new Boolean(true))); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); - assertEquals(FilterStringColumnNotBetween.class, ve.getClass()); + assertEquals(FilterTimestampColumnNotBetween.class, ve.getClass()); } // Test translation of both IN filters and boolean-valued IN expressions (non-filters). @@ -1468,7 +1463,7 @@ public void testIfConditionalExprs() throws HiveException { children1.set(2, col3Expr); ve = vc.getVectorExpression(exprDesc); assertTrue(ve instanceof IfExprCharScalarStringGroupColumn); - + // test for VARCHAR type VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10); constDesc2 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Alpha", 10)); diff --git a/ql/src/test/queries/clientpositive/cast_on_constant.q b/ql/src/test/queries/clientpositive/cast_on_constant.q new file mode 100644 index 0000000..aabb9c6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cast_on_constant.q @@ -0,0 +1,7 @@ +create table t1(ts_field timestamp, date_field date); +explain select * from t1 where ts_field = "2016-01-23 00:00:00"; +explain select * from t1 where date_field = "2016-01-23"; +explain select * from t1 where ts_field = timestamp '2016-01-23 00:00:00'; +explain select * from t1 where date_field = date '2016-01-23'; + +drop table t1; diff --git a/ql/src/test/results/clientpositive/cast_on_constant.q.out b/ql/src/test/results/clientpositive/cast_on_constant.q.out new file mode 100644 index 0000000..f8d6a0d --- /dev/null +++ b/ql/src/test/results/clientpositive/cast_on_constant.q.out @@ -0,0 +1,160 @@ +PREHOOK: query: create table t1(ts_field timestamp, date_field date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1(ts_field timestamp, date_field date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: explain select * from t1 where ts_field = "2016-01-23 00:00:00" +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where ts_field = "2016-01-23 00:00:00" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ts_field = 2016-01-23 00:00:00.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 2016-01-23 00:00:00.0 (type: timestamp), date_field (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1 where date_field = "2016-01-23" +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where date_field = "2016-01-23" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (date_field = 2016-01-23) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ts_field (type: timestamp), 2016-01-23 (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1 where ts_field = timestamp '2016-01-23 00:00:00' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where ts_field = timestamp '2016-01-23 00:00:00' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ts_field = 2016-01-23 00:00:00.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 2016-01-23 00:00:00.0 (type: timestamp), date_field (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1 where date_field = date '2016-01-23' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 where date_field = date '2016-01-23' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (date_field = 2016-01-23) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: ts_field (type: timestamp), 2016-01-23 (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 -- 1.7.12.4 (Apple Git-37)