diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index c4f47e1..8fc2c76 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2281,27 +2281,40 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio } static String getNormalizedName(String hiveTypeName) throws HiveException { - VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName); - switch (argType) { - case INT_FAMILY: + if (hiveTypeName.equalsIgnoreCase("long")) { return "Long"; - case FLOAT_FAMILY: + } + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveTypeName); + Category category = typeInfo.getCategory(); + if (category != Category.PRIMITIVE) { + throw new HiveException("Expecting primitive hive type name " + hiveTypeName); + } + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + switch (primitiveCategory) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + return "Long"; + case FLOAT: + case DOUBLE: return "Double"; - case DECIMAL: - //Return the decimal type as is, it includes scale and precision. - return hiveTypeName; case STRING: return "String"; - case CHAR: - //Return the CHAR type as is, it includes maximum length - return hiveTypeName; case VARCHAR: - //Return the VARCHAR type as is, it includes maximum length. + case CHAR: + //Return the VARCHAR/CHAR type as is, it includes maximum length return hiveTypeName; case DATE: return "Date"; case TIMESTAMP: return "Timestamp"; + case BINARY: + return "Binary"; + case DECIMAL: + //Return the decimal type as is, it includes scale and precision. + return hiveTypeName; case INTERVAL_YEAR_MONTH: case INTERVAL_DAY_TIME: return hiveTypeName; @@ -2311,24 +2324,39 @@ static String getNormalizedName(String hiveTypeName) throws HiveException { } static String getUndecoratedName(String hiveTypeName) throws HiveException { - VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName); - switch (argType) { - case INT_FAMILY: + if (hiveTypeName.equalsIgnoreCase("long")) { return "Long"; - case FLOAT_FAMILY: + } + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveTypeName); + Category category = typeInfo.getCategory(); + if (category != Category.PRIMITIVE) { + throw new HiveException("Expecting primitive hive type name " + hiveTypeName); + } + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + switch (primitiveCategory) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + return "Long"; + case FLOAT: + case DOUBLE: return "Double"; - case DECIMAL: - return "Decimal"; case STRING: return "String"; - case CHAR: - return "Char"; case VARCHAR: return "VarChar"; + case CHAR: + return "Char"; case DATE: return "Date"; case TIMESTAMP: return "Timestamp"; + case BINARY: + return "Binary"; + case DECIMAL: + return "Decimal"; case INTERVAL_YEAR_MONTH: case INTERVAL_DAY_TIME: return hiveTypeName; diff --git ql/src/test/queries/clientpositive/vector_binary_join_groupby.q ql/src/test/queries/clientpositive/vector_binary_join_groupby.q index 1a9d280..1d99e34 100644 --- ql/src/test/queries/clientpositive/vector_binary_join_groupby.q +++ ql/src/test/queries/clientpositive/vector_binary_join_groupby.q @@ -45,7 +45,7 @@ SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin; SELECT sum(hash(*)) -FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin; +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin; EXPLAIN SELECT count(*), bin @@ -55,3 +55,9 @@ GROUP BY bin; SELECT count(*), bin FROM hundredorc GROUP BY bin; + +-- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). + +EXPLAIN +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out index 8cbb4b1..6fbbf91 100644 --- ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out @@ -194,18 +194,17 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT sum(hash(*)) -FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY PREHOOK: Input: default@hundredorc #### A masked pattern was here #### POSTHOOK: query: SELECT sum(hash(*)) -FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### --107801098240 +-27832781952 PREHOOK: query: EXPLAIN SELECT count(*), bin FROM hundredorc @@ -315,3 +314,88 @@ POSTHOOK: Input: default@hundredorc 3 xylophone band 2 yard duty 3 zync studies +PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). + +EXPLAIN +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). + +EXPLAIN +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), bin (type: binary) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), bin (type: binary) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: binary) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index d9c027a..dc1fcd7 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -190,18 +190,17 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(*)) -FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY PREHOOK: Input: default@hundredorc #### A masked pattern was here #### POSTHOOK: query: SELECT sum(hash(*)) -FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### --107801098240 +-27832781952 PREHOOK: query: EXPLAIN SELECT count(*), bin FROM hundredorc @@ -303,3 +302,86 @@ POSTHOOK: Input: default@hundredorc 3 xylophone band 2 yard duty 3 zync studies +PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). + +EXPLAIN +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). + +EXPLAIN +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:t1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:t1 + TableScan + alias: t1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), bin (type: binary) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), bin (type: binary) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +