diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 511f5d65e2..803cf45f8a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -842,6 +842,7 @@ minillaplocal.query.files=\ vector_create_struct_table.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ + vector_decimal64_case_when.q,\ vector_full_outer_join.q,\ vector_fullouter_mapjoin_1_fast.q,\ vector_fullouter_mapjoin_1_optimized.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 627165d5be..dbd5d322bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1300,8 +1300,16 @@ private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc chil TypeInfo inputTypeInfo = child.getTypeInfo(); String inputTypeString = inputTypeInfo.getTypeName(); String castTypeString = castType.getTypeName(); + DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE; + if (child instanceof ExprNodeColumnDesc) { + String columnName = ((ExprNodeColumnDesc)child).getColumn(); + int columnIndex = initialColumnNames.indexOf(columnName); + if (columnIndex != -1) { + dataTypePhysicalVariation = getDataTypePhysicalVariation(columnIndex); + } + } - if (inputTypeString.equals(castTypeString)) { + if (inputTypeString.equals(castTypeString) && dataTypePhysicalVariation != DataTypePhysicalVariation.DECIMAL_64) { // Nothing to be done return null; } @@ -1693,9 +1701,6 @@ private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws GenericUDF udf = ((ExprNodeGenericFuncDesc) exprNodeDesc).getGenericUDF(); Class udfClass = udf.getClass(); - if (udf instanceof GenericUDFToDecimal) { - return true; - } // We have a class-level annotation that says whether the UDF's vectorization expressions // support Decimal64. VectorizedExpressionsSupportDecimal64 annotation = @@ -2473,14 +2478,18 @@ private VectorExpression getCoalesceExpression(List childExpr, final int size = vectorChildren.length; TypeInfo[] inputTypeInfos = new TypeInfo[size]; DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[size]; + DataTypePhysicalVariation outputDataTypePhysicalVariation = DataTypePhysicalVariation.DECIMAL_64; int i = 0; for (VectorExpression ve : vectorChildren) { inputColumns[i] = ve.getOutputColumnNum(); inputTypeInfos[i] = ve.getOutputTypeInfo(); inputDataTypePhysicalVariations[i++] = ve.getOutputDataTypePhysicalVariation(); + if (inputDataTypePhysicalVariations[i - 1] == DataTypePhysicalVariation.NONE) { + outputDataTypePhysicalVariation = DataTypePhysicalVariation.NONE; + } } - final int outputColumnNum = ocm.allocateOutputColumn(returnType); + final int outputColumnNum = ocm.allocateOutputColumn(returnType, outputDataTypePhysicalVariation); VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outputColumnNum); vectorCoalesce.setChildExpressions(vectorChildren); @@ -2489,7 +2498,7 @@ private VectorExpression getCoalesceExpression(List childExpr, vectorCoalesce.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations); vectorCoalesce.setOutputTypeInfo(returnType); - vectorCoalesce.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + vectorCoalesce.setOutputDataTypePhysicalVariation(outputDataTypePhysicalVariation); freeNonColumns(vectorChildren); @@ -3877,8 +3886,9 @@ private VectorExpression doGetIfExpression(GenericUDFIf genericUDFIf, List] + Select Operator + expressions: ss_ext_list_price (type: decimal(7,2)), ss_ext_discount_amt (type: int) + outputColumnNames: ss_ext_list_price, ss_ext_discount_amt + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2] + Statistics: Num rows: 1000 Data size: 116000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(CASE WHEN ((ss_ext_discount_amt = 101)) THEN (ss_ext_list_price) ELSE (null) END) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(IfExprCondExprNull(col 5:boolean, col 6:decimal(7,2), null)(children: LongColEqualLongScalar(col 2:int, val 101) -> 5:boolean, ConvertDecimal64ToDecimal(col 0:decimal(7,2)/DECIMAL_64) -> 6:decimal(7,2)) -> 7:decimal(7,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:decimal(17,2) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 2] + dataColumns: ss_ext_list_price:decimal(7,2)/DECIMAL_64, ss_ext_wholesale_cost:decimal(19,1), ss_ext_discount_amt:int, ss_ext_sales_price:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(7,2), decimal(7,2)] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(case when (ss_ext_discount_amt=101) then ss_ext_list_price else null end) from vector_decimal64_case_when_tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_decimal64_case_when_tmp +#### A masked pattern was here #### +POSTHOOK: query: select sum(case when (ss_ext_discount_amt=101) then ss_ext_list_price else null end) from vector_decimal64_case_when_tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_decimal64_case_when_tmp +#### A masked pattern was here #### +1001.00 +PREHOOK: query: explain vectorization detail select sum(NVL(ss_ext_list_price, 1)) from vector_decimal64_case_when_tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_decimal64_case_when_tmp +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select sum(NVL(ss_ext_list_price, 1)) from vector_decimal64_case_when_tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_decimal64_case_when_tmp +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_decimal64_case_when_tmp + Statistics: Num rows: 1000 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ss_ext_list_price:decimal(7,2)/DECIMAL_64, 1:ss_ext_wholesale_cost:decimal(19,1), 2:ss_ext_discount_amt:int, 3:ss_ext_sales_price:decimal(7,2)/DECIMAL_64, 4:ROW__ID:struct] + Select Operator + expressions: ss_ext_list_price (type: decimal(7,2)) + outputColumnNames: ss_ext_list_price + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1000 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(COALESCE(ss_ext_list_price,1)) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(VectorCoalesce(columns [5, 6])(children: ConvertDecimal64ToDecimal(col 0:decimal(12,2)/DECIMAL_64) -> 5:decimal(12,2), ConstantVectorExpression(val 1) -> 6:decimal(10,0)) -> 7:decimal(12,2)) -> decimal(22,2) + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:decimal(22,2) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(22,2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0] + dataColumns: ss_ext_list_price:decimal(7,2)/DECIMAL_64, ss_ext_wholesale_cost:decimal(19,1), ss_ext_discount_amt:int, ss_ext_sales_price:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(12,2), decimal(10,0), decimal(12,2)] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:decimal(22,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(22,2)) -> decimal(22,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(NVL(ss_ext_list_price, 1)) from vector_decimal64_case_when_tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_decimal64_case_when_tmp +#### A masked pattern was here #### +POSTHOOK: query: select sum(NVL(ss_ext_list_price, 1)) from vector_decimal64_case_when_tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_decimal64_case_when_tmp +#### A masked pattern was here #### +1500500.00 +PREHOOK: query: explain vectorization detail select sum(NVL(ss_ext_list_price, 1.0)) from vector_decimal64_case_when_tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_decimal64_case_when_tmp +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select sum(NVL(ss_ext_list_price, 1.0)) from vector_decimal64_case_when_tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_decimal64_case_when_tmp +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_decimal64_case_when_tmp + Statistics: Num rows: 1000 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ss_ext_list_price:decimal(7,2)/DECIMAL_64, 1:ss_ext_wholesale_cost:decimal(19,1), 2:ss_ext_discount_amt:int, 3:ss_ext_sales_price:decimal(7,2)/DECIMAL_64, 4:ROW__ID:struct] + Select Operator + expressions: ss_ext_list_price (type: decimal(7,2)) + outputColumnNames: ss_ext_list_price + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1000 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(COALESCE(ss_ext_list_price,1)) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(VectorCoalesce(columns [5, 6])(children: ConvertDecimal64ToDecimal(col 0:decimal(7,2)/DECIMAL_64) -> 5:decimal(7,2), ConstantVectorExpression(val 1) -> 6:decimal(7,2)) -> 7:decimal(7,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:decimal(17,2) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0] + dataColumns: ss_ext_list_price:decimal(7,2)/DECIMAL_64, ss_ext_wholesale_cost:decimal(19,1), ss_ext_discount_amt:int, ss_ext_sales_price:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(7,2), decimal(7,2), decimal(7,2)] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(NVL(ss_ext_list_price, 1.0)) from vector_decimal64_case_when_tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_decimal64_case_when_tmp +#### A masked pattern was here #### +POSTHOOK: query: select sum(NVL(ss_ext_list_price, 1.0)) from vector_decimal64_case_when_tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_decimal64_case_when_tmp +#### A masked pattern was here #### +1500500.00