diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 3510016c07..40d4b21923 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -880,6 +880,7 @@ minillaplocal.query.files=\ vector_decimal_udf.q,\ vector_decimal64_case_when_nvl.q,\ vector_decimal64_case_when_nvl_cbo.q,\ + vector_decimal64_multi_vertex.q,\ vector_full_outer_join.q,\ vector_fullouter_mapjoin_1_fast.q,\ vector_fullouter_mapjoin_1_optimized.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index 07cb5cb936..86f3aaafcd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -24,6 +24,7 @@ import java.util.Iterator; import java.util.List; +import org.apache.hadoop.hive.ql.exec.vector.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.MapredContext; @@ -32,11 +33,6 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; @@ -96,6 +92,7 @@ private VectorDeserializeRow valueLazyBinaryDeserializeToRow; private VectorizedRowBatch batch; + private VectorizedRowBatchCtx batchContext; private long batchBytes = 0; private boolean handleGroupKey = true; // For now. @@ -131,6 +128,7 @@ public void init(JobConf job, OutputCollector output, Reporter reporter) throws reducer = gWork.getReducer(); vectorized = gWork.getVectorMode(); reducer.setParentOperators(null); // clear out any parents as reducer is the + batchContext = gWork.getVectorizedRowBatchCtx(); // root isTagged = gWork.getNeedsTagging(); try { @@ -180,24 +178,32 @@ public void init(JobConf job, OutputCollector output, Reporter reporter) throws BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer; keyBinarySortableDeserializeToRow = - new VectorDeserializeRow( - new BinarySortableDeserializeRead( - VectorizedBatchUtil.typeInfosFromStructObjectInspector( - keyStructInspector), - /* useExternalBuffer */ true, - binarySortableSerDe.getSortOrders(), - binarySortableSerDe.getNullMarkers(), - binarySortableSerDe.getNotNullMarkers())); + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + VectorizedBatchUtil.typeInfosFromStructObjectInspector( + keyStructInspector), + (batchContext.getRowdataTypePhysicalVariations().length > firstValueColumnOffset) + ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), 0, + firstValueColumnOffset) + : batchContext.getRowdataTypePhysicalVariations(), + /* useExternalBuffer */ true, + binarySortableSerDe.getSortOrders(), + binarySortableSerDe.getNullMarkers(), + binarySortableSerDe.getNotNullMarkers())); keyBinarySortableDeserializeToRow.init(0); final int valuesSize = valueStructInspector.getAllStructFieldRefs().size(); if (valuesSize > 0) { valueLazyBinaryDeserializeToRow = - new VectorDeserializeRow( - new LazyBinaryDeserializeRead( - VectorizedBatchUtil.typeInfosFromStructObjectInspector( - valueStructInspector), - /* useExternalBuffer */ true)); + new VectorDeserializeRow( + new LazyBinaryDeserializeRead( + VectorizedBatchUtil.typeInfosFromStructObjectInspector( + valueStructInspector), + (batchContext.getRowdataTypePhysicalVariations().length >= totalColumns) + ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), + firstValueColumnOffset, totalColumns) + : null, + /* useExternalBuffer */ true)); valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset); // Create data buffers for value bytes column vectors. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 25573ce026..2dfa61be3e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -189,24 +189,32 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer; keyBinarySortableDeserializeToRow = - new VectorDeserializeRow( - new BinarySortableDeserializeRead( - VectorizedBatchUtil.typeInfosFromStructObjectInspector( - keyStructInspector), - /* useExternalBuffer */ true, - binarySortableSerDe.getSortOrders(), - binarySortableSerDe.getNullMarkers(), - binarySortableSerDe.getNotNullMarkers())); + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + VectorizedBatchUtil.typeInfosFromStructObjectInspector( + keyStructInspector), + (batchContext.getRowdataTypePhysicalVariations().length > firstValueColumnOffset) + ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), 0, + firstValueColumnOffset) + : batchContext.getRowdataTypePhysicalVariations(), + /* useExternalBuffer */ true, + binarySortableSerDe.getSortOrders(), + binarySortableSerDe.getNullMarkers(), + binarySortableSerDe.getNotNullMarkers())); keyBinarySortableDeserializeToRow.init(0); final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size(); if (valuesSize > 0) { valueLazyBinaryDeserializeToRow = - new VectorDeserializeRow( - new LazyBinaryDeserializeRead( - VectorizedBatchUtil.typeInfosFromStructObjectInspector( - valueStructInspectors), - /* useExternalBuffer */ true)); + new VectorDeserializeRow( + new LazyBinaryDeserializeRead( + VectorizedBatchUtil.typeInfosFromStructObjectInspector( + valueStructInspectors), + (batchContext.getRowdataTypePhysicalVariations().length >= totalColumns) + ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), + firstValueColumnOffset, totalColumns) + : null, + /* useExternalBuffer */ true)); valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset); // Create data buffers for value bytes column vectors. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 988291664e..29c172f21d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -2315,6 +2315,7 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, ArrayList reduceColumnNames = new ArrayList(); ArrayList reduceTypeInfos = new ArrayList(); + ArrayList reduceDataTypePhysicalVariations = new ArrayList(); if (reduceWork.getNeedsTagging()) { setNodeIssue("Tagging not supported"); @@ -2350,6 +2351,7 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, for (StructField field: keyFields) { reduceColumnNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName()); reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName())); + reduceDataTypePhysicalVariations.add(DataTypePhysicalVariation.NONE); } columnSortOrder = keyTableProperties.getProperty(serdeConstants.SERIALIZATION_SORT_ORDER); @@ -2370,7 +2372,16 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, for (StructField field: valueFields) { reduceColumnNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName()); - reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName())); + TypeInfo reduceTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString( + field.getFieldObjectInspector().getTypeName()); + reduceTypeInfos.add(reduceTypeInfo); + if (reduceTypeInfo instanceof DecimalTypeInfo && + HiveDecimalWritable.isPrecisionDecimal64(((DecimalTypeInfo)reduceTypeInfo).getPrecision())) { + reduceDataTypePhysicalVariations.add(DataTypePhysicalVariation.DECIMAL_64); + } + else { + reduceDataTypePhysicalVariations.add(DataTypePhysicalVariation.NONE); + } } } } catch (Exception e) { @@ -2379,6 +2390,7 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, vectorTaskColumnInfo.setAllColumnNames(reduceColumnNames); vectorTaskColumnInfo.setAllTypeInfos(reduceTypeInfos); + vectorTaskColumnInfo.setAlldataTypePhysicalVariations(reduceDataTypePhysicalVariations); vectorTaskColumnInfo.setReduceColumnSortOrder(columnSortOrder); vectorTaskColumnInfo.setReduceColumnNullOrder(columnNullOrder); @@ -5071,6 +5083,10 @@ private static VectorPTFInfo createVectorPTFInfo(Operator decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 1:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64, VectorUDAFSumDecimal64(col 2:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: PARTIAL2 keyExpressions: col 0:int @@ -1110,7 +1110,7 @@ STAGE PLANS: Group By Operator aggregations: count(_col0), sum(_col1), sum(_col2) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint, VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFCount(col 0:int) -> bigint, VectorUDAFSumDecimal64(col 1:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64, VectorUDAFSumDecimal64(col 2:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: PARTIAL2 native: false @@ -1141,14 +1141,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: VALUE._col0:bigint, VALUE._col1:decimal(17,2), VALUE._col2:decimal(17,2) + dataColumns: VALUE._col0:bigint, VALUE._col1:decimal(17,2)/DECIMAL_64, VALUE._col2:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFSumDecimal64(col 1:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64, VectorUDAFSumDecimal64(col 2:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_decimal_vectorized.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_decimal_vectorized.q.out index 3d71672a42..b90e4f15a4 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_decimal_vectorized.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_decimal_vectorized.q.out @@ -233,7 +233,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:decimal(4,2), VALUE._col0:decimal(4,0) + dataColumns: KEY.reducesinkkey0:decimal(4,2), VALUE._col0:decimal(4,0)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -659,7 +659,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:decimal(4,2), VALUE._col0:decimal(4,0) + dataColumns: KEY.reducesinkkey0:decimal(4,2), VALUE._col0:decimal(4,0)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -843,7 +843,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:decimal(4,2), VALUE._col0:decimal(4,0) + dataColumns: KEY.reducesinkkey0:decimal(4,2), VALUE._col0:decimal(4,0)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -1264,7 +1264,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:decimal(4,2), VALUE._col0:decimal(4,0) + dataColumns: KEY.reducesinkkey0:decimal(4,2), VALUE._col0:decimal(4,0)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index cc72f4546a..9b912781ae 100644 --- a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -1036,7 +1036,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(11,1) + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(11,1)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -1181,7 +1181,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(2,1) + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(2,1)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -1326,7 +1326,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(11,1) + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(11,1)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -1471,7 +1471,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(11,1) + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(11,1)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index bedae4d20b..98be32ed13 100644 --- a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -205,7 +205,7 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDecimal(col 5:decimal(12,0)) -> decimal(12,0), VectorUDAFCountMerge(col 6:bigint) -> bigint + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDecimal64(col 5:decimal(12,0)/DECIMAL_64) -> decimal(12,0)/DECIMAL_64, VectorUDAFCountMerge(col 6:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int @@ -222,8 +222,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 8, 9, 12] - selectExpressions: DoubleColDivideLongColumn(col 7:double, col 2:bigint)(children: CastLongToDouble(col 1:bigint) -> 7:double) -> 8:double, DoubleColDivideLongColumn(col 3:double, col 4:bigint) -> 9:double, CastDecimalToDecimal(col 11:decimal(32,20))(children: DecimalColDivideDecimalColumn(col 5:decimal(12,0), col 10:decimal(19,0))(children: CastLongToDecimal(col 6:bigint) -> 10:decimal(19,0)) -> 11:decimal(32,20)) -> 12:decimal(6,4) + projectedOutputColumnNums: [0, 8, 9, 13] + selectExpressions: DoubleColDivideLongColumn(col 7:double, col 2:bigint)(children: CastLongToDouble(col 1:bigint) -> 7:double) -> 8:double, DoubleColDivideLongColumn(col 3:double, col 4:bigint) -> 9:double, CastDecimalToDecimal(col 12:decimal(32,20))(children: DecimalColDivideDecimalColumn(col 10:decimal(12,0), col 11:decimal(19,0))(children: ConvertDecimal64ToDecimal(col 5:decimal(12,0)/DECIMAL_64) -> 10:decimal(12,0), CastLongToDecimal(col 6:bigint) -> 11:decimal(19,0)) -> 12:decimal(32,20)) -> 13:decimal(6,4) Statistics: Num rows: 257 Data size: 33924 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal64_case_when_nvl.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal64_case_when_nvl.q.out index 2b480586f5..aadd13d9d8 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal64_case_when_nvl.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal64_case_when_nvl.q.out @@ -126,14 +126,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: VALUE._col0:decimal(17,2) + dataColumns: VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 0:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false @@ -260,14 +260,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: VALUE._col0:decimal(17,2) + dataColumns: VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 0:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false @@ -394,14 +394,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: VALUE._col0:decimal(17,2) + dataColumns: VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 0:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal64_case_when_nvl_cbo.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal64_case_when_nvl_cbo.q.out index d3e6eec3fa..0a23094e6d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal64_case_when_nvl_cbo.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal64_case_when_nvl_cbo.q.out @@ -262,14 +262,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: VALUE._col0:decimal(17,2) + dataColumns: VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 0:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false @@ -397,14 +397,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: VALUE._col0:decimal(17,2) + dataColumns: VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 0:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal64_multi_vertex.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal64_multi_vertex.q.out new file mode 100644 index 0000000000..6acc8a9b64 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_decimal64_multi_vertex.q.out @@ -0,0 +1,328 @@ +PREHOOK: query: create table store +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int +) +row format delimited fields terminated by '\t' +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store +POSTHOOK: query: create table store +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int +) +row format delimited fields terminated by '\t' +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store +PREHOOK: query: create table store_sales +( + ss_item_sk int, + ss_ext_sales_price decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales +( + ss_item_sk int, + ss_ext_sales_price decimal(7,2) +) +row format delimited fields terminated by '\t' +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: insert into store values(1,'ramesh','ramesh','ramesh',1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@store +POSTHOOK: query: insert into store values(1,'ramesh','ramesh','ramesh',1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@store +POSTHOOK: Lineage: store.s_closed_date_sk SCRIPT [] +POSTHOOK: Lineage: store.s_rec_end_date SCRIPT [] +POSTHOOK: Lineage: store.s_rec_start_date SCRIPT [] +POSTHOOK: Lineage: store.s_store_id SCRIPT [] +POSTHOOK: Lineage: store.s_store_sk SCRIPT [] +PREHOOK: query: insert into store_sales values(1,1.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@store_sales +POSTHOOK: query: insert into store_sales values(1,1.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@store_sales +POSTHOOK: Lineage: store_sales.ss_ext_sales_price SCRIPT [] +POSTHOOK: Lineage: store_sales.ss_item_sk SCRIPT [] +PREHOOK: query: explain vectorization detail +select s_store_id brand_id, s_rec_start_date brand, s_rec_end_date, s_closed_date_sk, + sum(ss_ext_sales_price) ext_price + from store_sales, store + where ss_item_sk = s_store_sk + group by s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select s_store_id brand_id, s_rec_start_date brand, s_rec_end_date, s_closed_date_sk, + sum(ss_ext_sales_price) ext_price + from store_sales, store + where ss_item_sk = s_store_sk + group by s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: ss_item_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ss_item_sk:int, 1:ss_ext_sales_price:decimal(7,2)/DECIMAL_64, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: ss_item_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:decimal(7,2) + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:decimal(7,2), 3:string, 4:string, 5:string, 6:int + smallTableValueMapping: 3:string, 4:string, 5:string, 6:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col1, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal64(col 1:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 3:string, col 4:string, col 5:string, col 6:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + null sort order: zzzz + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:string, 3:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 4:decimal(17,2) + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: ss_item_sk:int, ss_ext_sales_price:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [string, string, string, bigint] + Map 3 + Map Operator Tree: + TableScan + alias: store + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:s_store_sk:int, 1:s_store_id:string, 2:s_rec_start_date:string, 3:s_rec_end_date:string, 4:s_closed_date_sk:int, 5:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int), s_store_id (type: string), s_rec_start_date (type: string), s_rec_end_date (type: string), s_closed_date_sk (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4] + Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:string, 3:string, 4:int + Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: s_store_sk:int, s_store_id:string, s_rec_start_date:string, s_rec_end_date:string, s_closed_date_sk:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: zzzz + reduceColumnSortOrder: ++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:string, KEY._col3:int, VALUE._col0:decimal(17,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal64(col 4:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:string, col 3:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s_store_id brand_id, s_rec_start_date brand, s_rec_end_date, s_closed_date_sk, + sum(ss_ext_sales_price) ext_price + from store_sales, store + where ss_item_sk = s_store_sk + group by s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk +PREHOOK: type: QUERY +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: select s_store_id brand_id, s_rec_start_date brand, s_rec_end_date, s_closed_date_sk, + sum(ss_ext_sales_price) ext_price + from store_sales, store + where ss_item_sk = s_store_sk + group by s_store_id, + s_rec_start_date, + s_rec_end_date, + s_closed_date_sk +POSTHOOK: type: QUERY +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +ramesh ramesh ramesh 1 1.10 diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index c55f540266..a5a07fa95e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -547,14 +547,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 10 - dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5), VALUE._col2:decimal(11,5), VALUE._col3:decimal(21,5), VALUE._col4:bigint, VALUE._col5:decimal(16,0), VALUE._col6:decimal(16,0), VALUE._col7:decimal(26,0), VALUE._col8:bigint + dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5)/DECIMAL_64, VALUE._col2:decimal(11,5)/DECIMAL_64, VALUE._col3:decimal(21,5), VALUE._col4:bigint, VALUE._col5:decimal(16,0)/DECIMAL_64, VALUE._col6:decimal(16,0)/DECIMAL_64, VALUE._col7:decimal(26,0), VALUE._col8:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 3:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFMaxDecimal(col 6:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 7:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 8:decimal(26,0)) -> decimal(26,0), VectorUDAFCountMerge(col 9:bigint) -> bigint + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal64(col 2:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFMinDecimal64(col 3:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFMaxDecimal64(col 6:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFMinDecimal64(col 7:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFSumDecimal(col 8:decimal(26,0)) -> decimal(26,0), VectorUDAFCountMerge(col 9:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int @@ -752,14 +752,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 14 - dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5), VALUE._col2:decimal(11,5), VALUE._col3:decimal(21,5), VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint, VALUE._col7:decimal(16,0), VALUE._col8:decimal(16,0), VALUE._col9:decimal(26,0), VALUE._col10:double, VALUE._col11:double, VALUE._col12:bigint + dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5)/DECIMAL_64, VALUE._col2:decimal(11,5)/DECIMAL_64, VALUE._col3:decimal(21,5), VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint, VALUE._col7:decimal(16,0)/DECIMAL_64, VALUE._col8:decimal(16,0)/DECIMAL_64, VALUE._col9:decimal(26,0), VALUE._col10:double, VALUE._col11:double, VALUE._col12:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12) Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 3:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDecimal(col 8:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 9:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 10:decimal(26,0)) -> decimal(26,0), VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal64(col 2:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFMinDecimal64(col 3:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDecimal64(col 8:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFMinDecimal64(col 9:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFSumDecimal(col 10:decimal(26,0)) -> decimal(26,0), VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_join.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_join.q.out index a2aeb20f0e..8f1283ae12 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_join.q.out @@ -189,14 +189,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: VALUE._col0:decimal(17,2) + dataColumns: VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 0:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index 8dd8d6196c..bfce1be686 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -114,7 +114,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:decimal(10,0), VALUE._col0:decimal(11,0) + dataColumns: KEY.reducesinkkey0:decimal(10,0), VALUE._col0:decimal(11,0)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -236,7 +236,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:decimal(11,0), VALUE._col0:decimal(10,0) + dataColumns: KEY.reducesinkkey0:decimal(11,0), VALUE._col0:decimal(10,0)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -386,7 +386,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:decimal(10,0), VALUE._col0:decimal(11,0) + dataColumns: KEY.reducesinkkey0:decimal(10,0), VALUE._col0:decimal(11,0)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -508,7 +508,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:decimal(11,0), VALUE._col0:decimal(10,0) + dataColumns: KEY.reducesinkkey0:decimal(11,0), VALUE._col0:decimal(10,0)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -658,7 +658,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:decimal(10,0), VALUE._col0:decimal(11,0) + dataColumns: KEY.reducesinkkey0:decimal(10,0), VALUE._col0:decimal(11,0)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -780,7 +780,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:decimal(11,0), VALUE._col0:decimal(10,0) + dataColumns: KEY.reducesinkkey0:decimal(11,0), VALUE._col0:decimal(10,0)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out index 0ec8c80d4d..5902a0d4e7 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out @@ -148,7 +148,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(10,4), VALUE._col1:decimal(15,8) + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(10,4)/DECIMAL_64, VALUE._col1:decimal(15,8)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index f54064c235..fd04e6bbc3 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -7962,14 +7962,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: VALUE._col0:decimal(15,3) + dataColumns: VALUE._col0:decimal(15,3)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFMinDecimal(col 0:decimal(15,3)) -> decimal(15,3) + aggregators: VectorUDAFMinDecimal64(col 0:decimal(15,3)/DECIMAL_64) -> decimal(15,3)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false @@ -8098,14 +8098,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: VALUE._col0:decimal(15,3) + dataColumns: VALUE._col0:decimal(15,3)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFMaxDecimal(col 0:decimal(15,3)) -> decimal(15,3) + aggregators: VectorUDAFMaxDecimal64(col 0:decimal(15,3)/DECIMAL_64) -> decimal(15,3)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index 94fa3cfb3a..5a178ddb08 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -1686,14 +1686,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: VALUE._col0:decimal(17,2) + dataColumns: VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 0:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 0:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false @@ -1728,9 +1728,9 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(17,2) + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(27,2), bigint] + scratchColumnTypeNames: [decimal(27,2), bigint, decimal(17,2)] Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(17,2)) @@ -1762,7 +1762,7 @@ STAGE PLANS: PTF Vectorization: className: VectorPTFOperator evaluatorClasses: [VectorPTFEvaluatorDecimalSum] - functionInputExpressions: [col 1:decimal(17,2)] + functionInputExpressions: [ConvertDecimal64ToDecimal(col 1:decimal(17,2)/DECIMAL_64) -> 4:decimal(17,2)] functionNames: [sum] keyInputColumns: [] native: true @@ -1914,14 +1914,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2) + dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 2:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) @@ -1958,9 +1958,9 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2) + dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(27,2)] + scratchColumnTypeNames: [decimal(27,2), decimal(17,2)] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) @@ -1992,7 +1992,7 @@ STAGE PLANS: PTF Vectorization: className: VectorPTFOperator evaluatorClasses: [VectorPTFEvaluatorDecimalSum] - functionInputExpressions: [col 2:decimal(17,2)] + functionInputExpressions: [ConvertDecimal64ToDecimal(col 2:decimal(17,2)/DECIMAL_64) -> 4:decimal(17,2)] functionNames: [sum] keyInputColumns: [1, 0] native: true @@ -2232,14 +2232,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2) + dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 2:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) @@ -2276,9 +2276,9 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2) + dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(27,2)] + scratchColumnTypeNames: [decimal(27,2), decimal(17,2)] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) @@ -2310,7 +2310,7 @@ STAGE PLANS: PTF Vectorization: className: VectorPTFOperator evaluatorClasses: [VectorPTFEvaluatorDecimalSum] - functionInputExpressions: [col 2:decimal(17,2)] + functionInputExpressions: [ConvertDecimal64ToDecimal(col 2:decimal(17,2)/DECIMAL_64) -> 4:decimal(17,2)] functionNames: [sum] keyInputColumns: [1, 0] native: true @@ -2554,14 +2554,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2) + dataColumns: KEY._col0:decimal(7,2), KEY._col1:decimal(7,2), VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + aggregators: VectorUDAFSumDecimal64(col 2:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) @@ -2598,9 +2598,9 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2) + dataColumns: KEY.reducesinkkey0:decimal(7,2), KEY.reducesinkkey1:decimal(7,2), VALUE._col0:decimal(17,2)/DECIMAL_64 partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(27,2)] + scratchColumnTypeNames: [decimal(27,2), decimal(17,2)] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: decimal(7,2)), KEY.reducesinkkey0 (type: decimal(7,2)), VALUE._col0 (type: decimal(17,2)) @@ -2632,7 +2632,7 @@ STAGE PLANS: PTF Vectorization: className: VectorPTFOperator evaluatorClasses: [VectorPTFEvaluatorDecimalSum] - functionInputExpressions: [col 2:decimal(17,2)] + functionInputExpressions: [ConvertDecimal64ToDecimal(col 2:decimal(17,2)/DECIMAL_64) -> 4:decimal(17,2)] functionNames: [sum] keyInputColumns: [1, 0] native: true diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out index 5410237e9d..c34fa14f2f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out @@ -1523,7 +1523,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:timestamp, VALUE._col1:decimal(4,2) + dataColumns: KEY.reducesinkkey0:timestamp, VALUE._col1:decimal(4,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [bigint, decimal(4,2)] Reduce Operator Tree: @@ -1572,7 +1572,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterDecimalColEqualDecimalScalar(col 1:decimal(4,2), val 89.5) + predicateExpression: FilterDecimal64ColEqualDecimal64Scalar(col 1:decimal(4,2)/DECIMAL_64, val 8950) predicate: (_col2 = 89.5) (type: boolean) Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out index 9081ecf8c4..26c0be1db3 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -132,7 +132,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 14 - dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:float, VALUE._col0:string, VALUE._col1:int, VALUE._col2:timestamp, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, VALUE._col9:double, VALUE._col10:decimal(11,4), VALUE._col11:double + dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:float, VALUE._col0:string, VALUE._col1:int, VALUE._col2:timestamp, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, VALUE._col9:double, VALUE._col10:decimal(11,4)/DECIMAL_64, VALUE._col11:double partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index 49709488e7..cbbb9d3067 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -477,14 +477,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 3 - dataColumns: VALUE._col0:decimal(10,1), VALUE._col1:decimal(10,1), VALUE._col2:binary + dataColumns: VALUE._col0:decimal(10,1)/DECIMAL_64, VALUE._col1:decimal(10,1)/DECIMAL_64, VALUE._col2:binary partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) Group By Vectorization: - aggregators: VectorUDAFMinDecimal(col 0:decimal(10,1)) -> decimal(10,1), VectorUDAFMaxDecimal(col 1:decimal(10,1)) -> decimal(10,1), VectorUDAFBloomFilterMerge(col 2:binary) -> binary + aggregators: VectorUDAFMinDecimal64(col 0:decimal(10,1)/DECIMAL_64) -> decimal(10,1)/DECIMAL_64, VectorUDAFMaxDecimal64(col 1:decimal(10,1)/DECIMAL_64) -> decimal(10,1)/DECIMAL_64, VectorUDAFBloomFilterMerge(col 2:binary) -> binary className: VectorGroupByOperator groupByMode: FINAL native: false diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index fc6e1556fb..73346ed057 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -351,7 +351,7 @@ STAGE PLANS: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:smallint) -> smallint, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDecimal(col 9:decimal(4,2)) -> decimal(4,2) + aggregators: VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:smallint) -> smallint, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDecimal64(col 9:decimal(4,2)/DECIMAL_64) -> decimal(4,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:tinyint diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 1b5a0e5537..2ab234aa23 100644 --- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -193,7 +193,7 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDecimal(col 5:decimal(12,0)) -> decimal(12,0), VectorUDAFCountMerge(col 6:bigint) -> bigint + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDecimal64(col 5:decimal(12,0)/DECIMAL_64) -> decimal(12,0)/DECIMAL_64, VectorUDAFCountMerge(col 6:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int @@ -210,8 +210,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 8, 9, 12] - selectExpressions: DoubleColDivideLongColumn(col 7:double, col 2:bigint)(children: CastLongToDouble(col 1:bigint) -> 7:double) -> 8:double, DoubleColDivideLongColumn(col 3:double, col 4:bigint) -> 9:double, CastDecimalToDecimal(col 11:decimal(32,20))(children: DecimalColDivideDecimalColumn(col 5:decimal(12,0), col 10:decimal(19,0))(children: CastLongToDecimal(col 6:bigint) -> 10:decimal(19,0)) -> 11:decimal(32,20)) -> 12:decimal(6,4) + projectedOutputColumnNums: [0, 8, 9, 13] + selectExpressions: DoubleColDivideLongColumn(col 7:double, col 2:bigint)(children: CastLongToDouble(col 1:bigint) -> 7:double) -> 8:double, DoubleColDivideLongColumn(col 3:double, col 4:bigint) -> 9:double, CastDecimalToDecimal(col 12:decimal(32,20))(children: DecimalColDivideDecimalColumn(col 10:decimal(12,0), col 11:decimal(19,0))(children: ConvertDecimal64ToDecimal(col 5:decimal(12,0)/DECIMAL_64) -> 10:decimal(12,0), CastLongToDecimal(col 6:bigint) -> 11:decimal(19,0)) -> 12:decimal(32,20)) -> 13:decimal(6,4) Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index fb07d9ddbb..d6fd7c1177 100644 --- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -541,14 +541,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 10 - dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5), VALUE._col2:decimal(11,5), VALUE._col3:decimal(21,5), VALUE._col4:bigint, VALUE._col5:decimal(16,0), VALUE._col6:decimal(16,0), VALUE._col7:decimal(26,0), VALUE._col8:bigint + dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5)/DECIMAL_64, VALUE._col2:decimal(11,5)/DECIMAL_64, VALUE._col3:decimal(21,5), VALUE._col4:bigint, VALUE._col5:decimal(16,0)/DECIMAL_64, VALUE._col6:decimal(16,0)/DECIMAL_64, VALUE._col7:decimal(26,0), VALUE._col8:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 3:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFMaxDecimal(col 6:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 7:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 8:decimal(26,0)) -> decimal(26,0), VectorUDAFCountMerge(col 9:bigint) -> bigint + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal64(col 2:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFMinDecimal64(col 3:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFMaxDecimal64(col 6:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFMinDecimal64(col 7:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFSumDecimal(col 8:decimal(26,0)) -> decimal(26,0), VectorUDAFCountMerge(col 9:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int @@ -744,14 +744,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 14 - dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5), VALUE._col2:decimal(11,5), VALUE._col3:decimal(21,5), VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint, VALUE._col7:decimal(16,0), VALUE._col8:decimal(16,0), VALUE._col9:decimal(26,0), VALUE._col10:double, VALUE._col11:double, VALUE._col12:bigint + dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5)/DECIMAL_64, VALUE._col2:decimal(11,5)/DECIMAL_64, VALUE._col3:decimal(21,5), VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint, VALUE._col7:decimal(16,0)/DECIMAL_64, VALUE._col8:decimal(16,0)/DECIMAL_64, VALUE._col9:decimal(26,0), VALUE._col10:double, VALUE._col11:double, VALUE._col12:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12) Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 3:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDecimal(col 8:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 9:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 10:decimal(26,0)) -> decimal(26,0), VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal64(col 2:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFMinDecimal64(col 3:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDecimal64(col 8:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFMinDecimal64(col 9:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFSumDecimal(col 10:decimal(26,0)) -> decimal(26,0), VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java index 599dbc13a4..286e529641 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Properties; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -130,8 +131,14 @@ public static BinarySortableDeserializeRead ascendingNullsFirst(TypeInfo[] typeI } public BinarySortableDeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer, - boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker) { - super(typeInfos, useExternalBuffer); + boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker) { + this(typeInfos, null, useExternalBuffer, columnSortOrderIsDesc, columnNullMarker, + columnNotNullMarker); + } + + public BinarySortableDeserializeRead(TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, + boolean useExternalBuffer, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker) { + super(typeInfos, dataTypePhysicalVariations, useExternalBuffer); final int count = typeInfos.length; root = new Field(); diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java index 7b8aae4a92..1ad8ce179b 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -25,6 +25,7 @@ import java.util.List; import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; @@ -853,7 +854,7 @@ private boolean doReadField(Field field) { decimalIsNull = !currentHiveDecimalWritable.mutateEnforcePrecisionScale(precision, scale); if (!decimalIsNull) { - if (field.dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + if (HiveDecimalWritable.isPrecisionDecimal64(precision)) { currentDecimal64 = currentHiveDecimalWritable.serialize64(scale); } return true; diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java index 000dfed491..53af41be65 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java @@ -25,7 +25,9 @@ import java.util.Deque; import java.util.List; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; @@ -80,6 +82,7 @@ Category category; PrimitiveCategory primitiveCategory; TypeInfo typeInfo; + DataTypePhysicalVariation dataTypePhysicalVariation; int index; int count; @@ -91,7 +94,12 @@ } public LazyBinaryDeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer) { - super(typeInfos, useExternalBuffer); + this(typeInfos, null, useExternalBuffer); + } + + public LazyBinaryDeserializeRead(TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, + boolean useExternalBuffer) { + super(typeInfos, dataTypePhysicalVariations, useExternalBuffer); tempVInt = new VInt(); tempVLong = new VLong(); currentExternalBufferNeeded = false; @@ -388,6 +396,12 @@ private boolean readPrimitive(Field field) throws IOException { final int scale = decimalTypeInfo.getScale(); decimalIsNull = !currentHiveDecimalWritable.mutateEnforcePrecisionScale(precision, scale); + if (!decimalIsNull) { + if (HiveDecimalWritable.isPrecisionDecimal64(precision)) { + currentDecimal64 = currentHiveDecimalWritable.serialize64(scale); + } + return true; + } } if (decimalIsNull) { return false;