diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 608ec04fe7..7a1e1625f3 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -373,6 +373,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vectorized_casts.q,\ vectorized_context.q,\ vectorized_date_funcs.q,\ + vectorized_decimal64_boundary.q,\ vectorized_dynamic_partition_pruning.q,\ vectorized_insert_into_bucketed_table.q,\ vectorized_mapjoin.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 627165d5be..4147195228 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -3212,7 +3212,7 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo try { Object constantValue = ((ExprNodeConstantDesc) child).getValue(); if (tryDecimal64Cast) { - if (((DecimalTypeInfo)returnType).precision() + ((DecimalTypeInfo)returnType).scale() <= 18) { + if (((DecimalTypeInfo)returnType).precision() <= 18) { Long longValue = castConstantToLong(constantValue, child.getTypeInfo(), PrimitiveCategory.LONG); return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, VectorExpressionDescriptor.Mode.PROJECTION); @@ -3229,7 +3229,7 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo } if (isIntFamily(inputType)) { if (tryDecimal64Cast) { - if (((DecimalTypeInfo)returnType).precision() + ((DecimalTypeInfo)returnType).scale() <= 18) { + if (((DecimalTypeInfo)returnType).precision() <= 18) { return createVectorExpression(CastLongToDecimal64.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.DECIMAL_64); } diff --git a/ql/src/test/queries/clientpositive/vectorized_decimal64_boundary.q b/ql/src/test/queries/clientpositive/vectorized_decimal64_boundary.q new file mode 100644 index 0000000000..72ef33da0c --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorized_decimal64_boundary.q @@ -0,0 +1,32 @@ +set hive.mapred.mode=nonstrict; + +drop table if exists mini_store; +create table mini_store +( + s_store_sk int, + s_store_id string +) +row format delimited fields terminated by '\t' +STORED AS ORC; + +drop table if exists mini_sales; +create table mini_sales +( + ss_store_sk int, + ss_quantity int, + ss_sales_price decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC; + +insert into mini_store values (1, 'store'); +insert into mini_sales values (1, 2, 1.2); + +explain vectorization detail +select s_store_id, coalesce(ss_sales_price*ss_quantity,0) sumsales + from mini_sales, mini_store + where ss_store_sk = s_store_sk; + +select s_store_id, coalesce(ss_sales_price*ss_quantity,0) sumsales +from mini_sales, mini_store +where ss_store_sk = s_store_sk; diff --git a/ql/src/test/results/clientpositive/llap/vectorized_decimal64_boundary.q.out b/ql/src/test/results/clientpositive/llap/vectorized_decimal64_boundary.q.out new file mode 100644 index 0000000000..88a66ebe2b --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vectorized_decimal64_boundary.q.out @@ -0,0 +1,259 @@ +PREHOOK: query: drop table if exists mini_store +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists mini_store +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table mini_store +( + s_store_sk int, + s_store_id string +) +row format delimited fields terminated by '\t' +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@mini_store +POSTHOOK: query: create table mini_store +( + s_store_sk int, + s_store_id string +) +row format delimited fields terminated by '\t' +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mini_store +PREHOOK: query: drop table if exists mini_sales +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists mini_sales +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table mini_sales +( + ss_store_sk int, + ss_quantity int, + ss_sales_price decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@mini_sales +POSTHOOK: query: create table mini_sales +( + ss_store_sk int, + ss_quantity int, + ss_sales_price decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mini_sales +PREHOOK: query: insert into mini_store values (1, 'store') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@mini_store +POSTHOOK: query: insert into mini_store values (1, 'store') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@mini_store +POSTHOOK: Lineage: mini_store.s_store_id SCRIPT [] +POSTHOOK: Lineage: mini_store.s_store_sk SCRIPT [] +PREHOOK: query: insert into mini_sales values (1, 2, 1.2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@mini_sales +POSTHOOK: query: insert into mini_sales values (1, 2, 1.2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@mini_sales +POSTHOOK: Lineage: mini_sales.ss_quantity SCRIPT [] +POSTHOOK: Lineage: mini_sales.ss_sales_price SCRIPT [] +POSTHOOK: Lineage: mini_sales.ss_store_sk SCRIPT [] +PREHOOK: query: explain vectorization detail +select s_store_id, coalesce(ss_sales_price*ss_quantity,0) sumsales + from mini_sales, mini_store + where ss_store_sk = s_store_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@mini_sales +PREHOOK: Input: default@mini_store +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select s_store_id, coalesce(ss_sales_price*ss_quantity,0) sumsales + from mini_sales, mini_store + where ss_store_sk = s_store_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mini_sales +POSTHOOK: Input: default@mini_store +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: mini_sales + filterExpr: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ss_store_sk:int, 1:ss_quantity:int, 2:ss_sales_price:decimal(7,2)/DECIMAL_64, 3:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: ss_store_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_store_sk (type: int), CASE WHEN ((ss_sales_price is not null and CAST( ss_quantity AS decimal(10,0)) is not null)) THEN ((ss_sales_price * CAST( ss_quantity AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 18] + selectExpressions: IfExprDecimal64ColumnDecimal64Column(col 14:boolean, col 16:decimal(18,2)/DECIMAL_64, col 17:decimal(18,2)/DECIMAL_64)(children: ColAndCol(col 11:boolean, col 13:boolean)(children: IsNotNull(col 2:decimal(7,2)/DECIMAL_64) -> 11:boolean, IsNotNull(col 19:decimal(10,0))(children: ConvertDecimal64ToDecimal(col 12:decimal(10,0)/DECIMAL_64)(children: CastLongToDecimal64(col 1:int) -> 12:decimal(10,0)/DECIMAL_64) -> 19:decimal(10,0)) -> 13:boolean) -> 14:boolean, Decimal64ColMultiplyDecimal64Column(col 2:decimal(7,2)/DECIMAL_64, col 15:decimal(10,0)/DECIMAL_64)(children: CastLongToDecimal64(col 1:int) -> 15:decimal(10,0)/DECIMAL_64) -> 16:decimal(18,2)/DECIMAL_64, ConstantVectorExpression(val 0) -> 17:bigint) -> 18:decimal(18,2)/DECIMAL_64 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 18:decimal(18,2) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(18,2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: ss_store_sk:int, ss_quantity:int, ss_sales_price:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(10,0)/DECIMAL_64, bigint, bigint, decimal(10,0)/DECIMAL_64, decimal(18,2)/DECIMAL_64, decimal(18,2), bigint, decimal(10,0)/DECIMAL_64, bigint, bigint, decimal(10,0)/DECIMAL_64, decimal(18,2)/DECIMAL_64, bigint, decimal(18,2)/DECIMAL_64, decimal(10,0)] + Map 3 + Map Operator Tree: + TableScan + alias: mini_store + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:s_store_sk:int, 1:s_store_id:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int), s_store_id (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: s_store_sk:int, s_store_id:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col1 (type: decimal(18,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s_store_id, coalesce(ss_sales_price*ss_quantity,0) sumsales +from mini_sales, mini_store +where ss_store_sk = s_store_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@mini_sales +PREHOOK: Input: default@mini_store +#### A masked pattern was here #### +POSTHOOK: query: select s_store_id, coalesce(ss_sales_price*ss_quantity,0) sumsales +from mini_sales, mini_store +where ss_store_sk = s_store_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mini_sales +POSTHOOK: Input: default@mini_store +#### A masked pattern was here #### +store 2.40