diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index d6bfa7a..b7feb1c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2074,6 +2074,8 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, // Elt is a special case because it can take variable number of arguments. ve = getEltExpression(childExpr, returnType); + } else if (udf instanceof GenericUDFGrouping) { + ve = getGroupingExpression((GenericUDFGrouping) udf, childExpr, returnType); } else if (udf instanceof GenericUDFBridge) { ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType); @@ -2195,6 +2197,53 @@ private VectorExpression getEltExpression(List childExpr, TypeInfo return vectorElt; } + private VectorExpression getGroupingExpression(GenericUDFGrouping udf, + List childExprs, TypeInfo returnType) + throws HiveException { + + ExprNodeDesc childExpr0 = childExprs.get(0); + if (!(childExpr0 instanceof ExprNodeColumnDesc)) { + return null; + } + ExprNodeColumnDesc groupingIdColDesc = (ExprNodeColumnDesc) childExpr0; + int groupingIdColNum = getInputColumnIndex(groupingIdColDesc.getColumn()); + + final int indexCount = childExprs.size() - 1; + int[] indices = new int[indexCount]; + for (int i = 0; i < indexCount; i++) { + ExprNodeDesc indexChildExpr = childExprs.get(i + 1); + if (!(indexChildExpr instanceof ExprNodeConstantDesc)) { + return null; + } + Object scalarObject = ((ExprNodeConstantDesc) indexChildExpr).getValue(); + final int index; + if (scalarObject instanceof Integer) { + index = (int) scalarObject; + } else if (scalarObject instanceof Long) { + index = (int) ((long) scalarObject); + } else { + return null; + } + indices[i] = index; + } + + final int outputColumnNum = ocm.allocateOutputColumn(returnType); + final VectorExpression ve; + if (indices.length == 1) { + ve = new GroupingColumn(groupingIdColNum, indices[0], outputColumnNum); + } else { + ve = new GroupingColumns(groupingIdColNum, indices, outputColumnNum); + } + + ve.setInputTypeInfos(groupingIdColDesc.getTypeInfo()); + ve.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); + + ve.setOutputTypeInfo(returnType); + ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + return ve; + } + public enum InConstantType { INT_FAMILY, TIMESTAMP, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java new file mode 100644 index 0000000..9bad386 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class GroupingColumn extends MathFuncLongToLong { + private static final long serialVersionUID = 1L; + + private final long mask; + + public GroupingColumn(int inputColumnNum, int index, int outputColumnNum) { + super(inputColumnNum, outputColumnNum); + this.mask = 1L << index; + } + + public GroupingColumn() { + super(); + + // Dummy final assignments. + mask = 0; + } + + @Override + protected long func(long v) { + return (v & mask) == 0 ? 0 : 1; + } + + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", mask " + mask; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return null; // Not applicable. + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumns.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumns.java new file mode 100644 index 0000000..b59204e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumns.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class GroupingColumns extends MathFuncLongToLong { + private static final long serialVersionUID = 1L; + + private final long[] masks; + + public GroupingColumns(int inputColumnNum, int[] indices, int outputColumnNum) { + super(inputColumnNum, outputColumnNum); + final int size = indices.length; + masks = new long[size]; + for (int i = 0; i < size; i++) { + masks[i] = 1L << indices[i]; + } + } + + public GroupingColumns() { + super(); + + // Dummy final assignments. + masks = null; + } + + @Override + protected long func(long v) { + + final int size = masks.length; + final int adjust = size - 1; + long result = 0; + for (int i = 0; i < size; i++) { + if ((v & masks[i]) != 0) { + result += 1L << (adjust - i); + } + } + return result; + } + + @Override + public String vectorExpressionParameters() { + return "col " + colNum + ", masks " + Arrays.toString(masks); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return null; // Not applicable. + } +} diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index 9501927..d75f2d8 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -118,7 +118,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -145,7 +145,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4] - selectExpressions: VectorUDFAdaptor(grouping(_col2, 1)) -> 3:bigint, VectorUDFAdaptor(grouping(_col2, 0)) -> 4:bigint + selectExpressions: GroupingColumn(col 2, mask 2) -> 3:bigint, GroupingColumn(col 2, mask 1) -> 4:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -279,7 +279,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -306,7 +306,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4] - selectExpressions: VectorUDFAdaptor(grouping(_col2, 1)) -> 3:bigint, VectorUDFAdaptor(grouping(_col2, 0)) -> 4:bigint + selectExpressions: GroupingColumn(col 2, mask 2) -> 3:bigint, GroupingColumn(col 2, mask 1) -> 4:bigint Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -447,7 +447,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -471,7 +471,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: VectorUDFAdaptor(grouping(_col2, 1)) -> 3:bigint) + predicateExpression: FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: GroupingColumn(col 2, mask 2) -> 3:bigint) predicate: (grouping(_col2, 1) = 1) (type: boolean) Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -616,7 +616,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -640,7 +640,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: VectorUDFAdaptor(grouping(_col2, 1)) -> 3:bigint), FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: VectorUDFAdaptor(grouping(_col2, 0)) -> 3:bigint)) + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: GroupingColumn(col 2, mask 2) -> 3:bigint), FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: GroupingColumn(col 2, mask 1) -> 3:bigint)) predicate: ((grouping(_col2, 0) = 1) or (grouping(_col2, 1) = 1)) (type: boolean) Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -650,7 +650,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 5, 4] - selectExpressions: LongColAddLongColumn(col 3:bigint, col 4:bigint)(children: VectorUDFAdaptor(grouping(_col2, 1)) -> 3:bigint, VectorUDFAdaptor(grouping(_col2, 0)) -> 4:bigint) -> 5:bigint, IfExprColumnNull(col 3:boolean, col 0:int, null)(children: LongColEqualLongScalar(col 6:bigint, val 1)(children: LongColAddLongColumn(col 3:bigint, col 4:bigint)(children: VectorUDFAdaptor(grouping(_col2, 1)) -> 3:bigint, VectorUDFAdaptor(grouping(_col2, 0)) -> 4:bigint) -> 6:bigint) -> 3:boolean, col 0:int) -> 4:int + selectExpressions: LongColAddLongColumn(col 3:bigint, col 4:bigint)(children: GroupingColumn(col 2, mask 2) -> 3:bigint, GroupingColumn(col 2, mask 1) -> 4:bigint) -> 5:bigint, IfExprColumnNull(col 3:boolean, col 0:int, null)(children: LongColEqualLongScalar(col 6:bigint, val 1)(children: LongColAddLongColumn(col 3:bigint, col 4:bigint)(children: GroupingColumn(col 2, mask 2) -> 3:bigint, GroupingColumn(col 2, mask 1) -> 4:bigint) -> 6:bigint) -> 3:boolean, col 0:int) -> 4:int Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: bigint), _col3 (type: int) @@ -822,7 +822,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -849,7 +849,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4] - selectExpressions: VectorUDFAdaptor(grouping(_col2, 1L)) -> 3:bigint, VectorUDFAdaptor(grouping(_col2, 0L)) -> 4:bigint + selectExpressions: GroupingColumn(col 2, mask 2) -> 3:bigint, GroupingColumn(col 2, mask 1) -> 4:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -983,7 +983,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -1010,7 +1010,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4] - selectExpressions: VectorUDFAdaptor(grouping(_col2, 1L)) -> 3:bigint, VectorUDFAdaptor(grouping(_col2, 0L)) -> 4:bigint + selectExpressions: GroupingColumn(col 2, mask 2) -> 3:bigint, GroupingColumn(col 2, mask 1) -> 4:bigint Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1119,7 +1119,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: VectorUDFAdaptor(grouping(_col2, 1L)) -> 3:bigint) + predicateExpression: FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: GroupingColumn(col 2, mask 2) -> 3:bigint) predicate: (grouping(_col2, 1L) = 1) (type: boolean) Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1142,7 +1142,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 @@ -1281,7 +1281,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: VectorUDFAdaptor(grouping(_col2, 1L)) -> 3:bigint), FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: VectorUDFAdaptor(grouping(_col2, 0L)) -> 3:bigint)) + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: GroupingColumn(col 2, mask 2) -> 3:bigint), FilterLongColEqualLongScalar(col 3:bigint, val 1)(children: GroupingColumn(col 2, mask 1) -> 3:bigint)) predicate: ((grouping(_col2, 0L) = 1) or (grouping(_col2, 1L) = 1)) (type: boolean) Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1304,7 +1304,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 @@ -1320,7 +1320,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -1347,7 +1347,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 5] - selectExpressions: LongColAddLongColumn(col 3:bigint, col 4:bigint)(children: VectorUDFAdaptor(grouping(_col2, 1L)) -> 3:bigint, VectorUDFAdaptor(grouping(_col2, 0L)) -> 4:bigint) -> 5:bigint + selectExpressions: LongColAddLongColumn(col 3:bigint, col 4:bigint)(children: GroupingColumn(col 2, mask 2) -> 3:bigint, GroupingColumn(col 2, mask 1) -> 4:bigint) -> 5:bigint Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: bigint), CASE WHEN ((_col2 = 1L)) THEN (_col0) END (type: int) @@ -1983,7 +1983,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -2010,7 +2010,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - selectExpressions: VectorUDFAdaptor(grouping(_col2, 1L, 0L)) -> 3:bigint + selectExpressions: GroupingColumns(col 2, masks [2, 1]) -> 3:bigint Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2149,7 +2149,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -2176,7 +2176,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - selectExpressions: VectorUDFAdaptor(grouping(_col2, 0L, 1L)) -> 3:bigint + selectExpressions: GroupingColumns(col 2, masks [1, 2]) -> 3:bigint Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2315,7 +2315,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -2342,7 +2342,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - selectExpressions: VectorUDFAdaptor(grouping(_col2, 1L, 0L)) -> 3:bigint + selectExpressions: GroupingColumns(col 2, masks [2, 1]) -> 3:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2476,7 +2476,7 @@ STAGE PLANS: reduceColumnNullOrder: aaa reduceColumnSortOrder: +++ allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 3 @@ -2503,7 +2503,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - selectExpressions: VectorUDFAdaptor(grouping(_col2, 0L, 1L)) -> 3:bigint + selectExpressions: GroupingColumns(col 2, masks [1, 2]) -> 3:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false