diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 9d27b8d5c0..abf76a95e7 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -877,6 +877,7 @@ minillaplocal.query.files=\ vector_decimal_udf.q,\ vector_decimal64_case_when_nvl.q,\ vector_decimal64_case_when_nvl_cbo.q,\ + vector_decimal64_multi_vertex.q,\ vector_full_outer_join.q,\ vector_fullouter_mapjoin_1_fast.q,\ vector_fullouter_mapjoin_1_optimized.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 043fdd63e7..dd5daa3d19 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -497,6 +497,10 @@ public void setInitialDataTypePhysicalVariations( return initialTypeInfos.toArray(new TypeInfo[0]); } + public List getInitialDataTypePhysicalVariations() { + return initialDataTypePhysicalVariations; + } + public TypeInfo getTypeInfo(int columnNum) throws HiveException { if (initialTypeInfos == null) { throw new HiveException("initialTypeInfos array is null in contextName " + contextName); @@ -834,6 +838,11 @@ public int allocateScratchColumn(TypeInfo typeInfo) throws HiveException { return ocm.allocateOutputColumn(typeInfo); } + public int allocateScratchColumn(TypeInfo typeInfo, DataTypePhysicalVariation dataTypePhysicalVariation) + throws HiveException { + return ocm.allocateOutputColumn(typeInfo, dataTypePhysicalVariation); + } + public int[] currentScratchColumns() { return ocm.currentScratchColumns(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 988291664e..500eca67eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -3832,7 +3832,14 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // Make a new big table scratch column for the small table value. TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); - int scratchColumn = vContext.allocateScratchColumn(typeInfo); + DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE; + if (typeInfo instanceof DecimalTypeInfo) { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; + if (decimalTypeInfo.getPrecision() <= 18) { + dataTypePhysicalVariation = DataTypePhysicalVariation.DECIMAL_64; + } + } + int scratchColumn = vContext.allocateScratchColumn(typeInfo, dataTypePhysicalVariation); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); @@ -4117,11 +4124,9 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, VectorExpression ve = allValueExpressions[i]; reduceSinkValueColumnMap[i] = ve.getOutputColumnNum(); reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo(); - reduceSinkValueColumnVectorTypes[i] = - VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]); - if (!IdentityExpression.isColumnOnly(ve)) { - reduceSinkValueExpressionsList.add(ve); - } + reduceSinkValueColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo( + reduceSinkValueTypeInfos[i], ve.getOutputDataTypePhysicalVariation()); + reduceSinkValueExpressionsList.add(ve); } if (reduceSinkValueExpressionsList.size() == 0) { reduceSinkValueExpressions = null; @@ -4676,7 +4681,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { ExprNodeDesc expr = colList.get(i); VectorExpression ve = vContext.getVectorExpression(expr); projectedOutputColumns[i] = ve.getOutputColumnNum(); - if (ve instanceof IdentityExpression) { + if (ve instanceof IdentityExpression + && ve.getOutputDataTypePhysicalVariation() != DataTypePhysicalVariation.DECIMAL_64) { // Suppress useless evaluation. continue; } diff --git a/ql/src/test/queries/clientpositive/vector_decimal64_multi_vertex.q b/ql/src/test/queries/clientpositive/vector_decimal64_multi_vertex.q new file mode 100644 index 0000000000..85444e38af --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_decimal64_multi_vertex.q @@ -0,0 +1,46 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +-- start query 1 in stream 0 using template query19.tpl and seed 1930872976 +create table store +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int +) +row format delimited fields terminated by '\t' +STORED AS ORC; +create table store_sales +( + ss_item_sk int, + ss_ext_sales_price decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC; + +insert into store values(1,'ramesh','ramesh','ramesh',1); +insert into store_sales values(1,1.1); + +explain vectorization detail +select s_store_id brand_id, s_rec_start_date brand, s_rec_end_date, s_closed_date_sk, + sum(ss_ext_sales_price) ext_price + from store_sales, store + where ss_item_sk = s_store_sk + group by s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk; + +select s_store_id brand_id, s_rec_start_date brand, s_rec_end_date, s_closed_date_sk, + sum(ss_ext_sales_price) ext_price + from store_sales, store + where ss_item_sk = s_store_sk + group by s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk; +-- end query 1 in stream 0 using template query19.tpl diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal64_multi_vertex.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal64_multi_vertex.q.out new file mode 100644 index 0000000000..1e2fafcb53 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_decimal64_multi_vertex.q.out @@ -0,0 +1,331 @@ +PREHOOK: query: create table store +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int +) +row format delimited fields terminated by '\t' +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store +POSTHOOK: query: create table store +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int +) +row format delimited fields terminated by '\t' +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store +PREHOOK: query: create table store_sales +( + ss_item_sk int, + ss_ext_sales_price decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales +( + ss_item_sk int, + ss_ext_sales_price decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: insert into store values(1,'ramesh','ramesh','ramesh',1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@store +POSTHOOK: query: insert into store values(1,'ramesh','ramesh','ramesh',1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@store +POSTHOOK: Lineage: store.s_closed_date_sk SCRIPT [] +POSTHOOK: Lineage: store.s_rec_end_date SCRIPT [] +POSTHOOK: Lineage: store.s_rec_start_date SCRIPT [] +POSTHOOK: Lineage: store.s_store_id SCRIPT [] +POSTHOOK: Lineage: store.s_store_sk SCRIPT [] +PREHOOK: query: insert into store_sales values(1,1.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@store_sales +POSTHOOK: query: insert into store_sales values(1,1.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@store_sales +POSTHOOK: Lineage: store_sales.ss_ext_sales_price SCRIPT [] +POSTHOOK: Lineage: store_sales.ss_item_sk SCRIPT [] +PREHOOK: query: explain vectorization detail +select s_store_id brand_id, s_rec_start_date brand, s_rec_end_date, s_closed_date_sk, + sum(ss_ext_sales_price) ext_price + from store_sales, store + where ss_item_sk = s_store_sk + group by s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select s_store_id brand_id, s_rec_start_date brand, s_rec_end_date, s_closed_date_sk, + sum(ss_ext_sales_price) ext_price + from store_sales, store + where ss_item_sk = s_store_sk + group by s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: ss_item_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ss_item_sk:int, 1:ss_ext_sales_price:decimal(7,2)/DECIMAL_64, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: ss_item_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + selectExpressions: col 1:decimal(7,2)/DECIMAL_64 + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:decimal(7,2) + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:decimal(7,2), 3:string, 4:string, 5:string, 6:int + smallTableValueMapping: 3:string, 4:string, 5:string, 6:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col1, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal64(col 1:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 3:string, col 4:string, col 5:string, col 6:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + null sort order: zzzz + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:string, 3:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 4:decimal(17,2) + valueExpressions: col 4:decimal(17,2)/DECIMAL_64 + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: ss_item_sk:int, ss_ext_sales_price:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [string, string, string, bigint] + Map 3 + Map Operator Tree: + TableScan + alias: store + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:s_store_sk:int, 1:s_store_id:string, 2:s_rec_start_date:string, 3:s_rec_end_date:string, 4:s_closed_date_sk:int, 5:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int), s_store_id (type: string), s_rec_start_date (type: string), s_rec_end_date (type: string), s_closed_date_sk (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4] + Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:string, 3:string, 4:int + valueExpressions: col 1:string, col 2:string, col 3:string, col 4:int + Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: s_store_sk:int, s_store_id:string, s_rec_start_date:string, s_rec_end_date:string, s_closed_date_sk:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: zzzz + reduceColumnSortOrder: ++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:string, KEY._col3:int, VALUE._col0:decimal(17,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:string, col 3:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s_store_id brand_id, s_rec_start_date brand, s_rec_end_date, s_closed_date_sk, + sum(ss_ext_sales_price) ext_price + from store_sales, store + where ss_item_sk = s_store_sk + group by s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: select s_store_id brand_id, s_rec_start_date brand, s_rec_end_date, s_closed_date_sk, + sum(ss_ext_sales_price) ext_price + from store_sales, store + where ss_item_sk = s_store_sk + group by s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +ramesh ramesh ramesh 1 1.10