diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 0c590c8..8616013 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -269,6 +269,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_acid3.q,\ vector_aggregate_9.q,\ vector_aggregate_without_gby.q,\ + vector_array.q,\ vector_auto_smb_mapjoin_14.q,\ vector_between_columns.q,\ vector_between_in.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 5b0c2bf..3d5f664 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -34,6 +34,7 @@ import java.util.regex.Pattern; import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveChar; @@ -153,6 +154,7 @@ private List projectedColumns; private List projectionColumnNames; private Map projectionColumnMap; + private OperatorType operatorType; //columnName to column position map // private final Map columnMap; @@ -172,6 +174,10 @@ public static HiveVectorAdaptorUsageMode getHiveConfValue(HiveConf hiveConf) { private HiveVectorAdaptorUsageMode hiveVectorAdaptorUsageMode; + public void setOperatorType(OperatorType operatorType) { + this.operatorType = operatorType; + } + private void setHiveConfVars(HiveConf hiveConf) { hiveVectorAdaptorUsageMode = HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf); } @@ -1459,6 +1465,8 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, VectorExpression ve = null; if (udf instanceof GenericUDFBetween && mode == VectorExpressionDescriptor.Mode.FILTER) { ve = getBetweenFilterExpression(childExpr, mode, returnType); + } else if (udf instanceof GenericUDFIndex) { + ve = getIndexExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { ve = getInExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFOPPositive) { @@ -1501,6 +1509,34 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, return ve; } + private VectorExpression getIndexExpression(List childExpr, + VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { + if (operatorType == OperatorType.FILTER || operatorType == OperatorType.SELECT) { + ExprNodeDesc first = childExpr.get(0); + ExprNodeDesc second = childExpr.get(1); + if (first instanceof ExprNodeColumnDesc) { + if (second instanceof ExprNodeConstantDesc) { + ListIndexColScalar listIndex = (ListIndexColScalar) createVectorExpression( + ListIndexColScalar.class, null, mode, returnType); + listIndex.setListColumn(getInputColumnIndex((ExprNodeColumnDesc) first)); + listIndex.setIndex((int) getIntFamilyScalarAsLong((ExprNodeConstantDesc) second)); + listIndex.setOutputColumn(ocm.allocateOutputColumn(returnType)); + listIndex.setChildExpressions(getVectorExpressions(childExpr, mode)); + return listIndex; + } else { + ListIndexColColumn listIndex = (ListIndexColColumn) createVectorExpression( + ListIndexColColumn.class, null, mode, returnType); + listIndex.setListColumn(getInputColumnIndex((ExprNodeColumnDesc) first)); + listIndex.setIndexColumn(getInputColumnIndex((ExprNodeColumnDesc) second)); + listIndex.setOutputColumn(ocm.allocateOutputColumn(returnType)); + listIndex.setChildExpressions(getVectorExpressions(childExpr, mode)); + return listIndex; + } + } + } + return null; + } + private VectorExpression getCastToTimestamp(GenericUDFTimestamp udf, List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { VectorExpression ve = getVectorExpressionForUdf(udf, udf.getClass(), childExpr, mode, returnType); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndex.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndex.java new file mode 100644 index 0000000..2abff21 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndex.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +import java.util.regex.Pattern; + +public abstract class ListIndex extends VectorExpression { + protected static Pattern PATTERN = Pattern.compile("array<(.*)>"); + protected int outputColumn; + + @Override + public void init(Configuration conf) { + super.init(conf); + setOutputType(PATTERN.matcher(childExpressions[0].getOutputType()).group()); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return null; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java new file mode 100644 index 0000000..541f494 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class ListIndexColColumn extends ListIndex { + private static final long serialVersionUID = 1L; + + private int listColumn; + private int indexColumn; + + public ListIndexColColumn() { + super(); + } + + public void setListColumn(int listColumn) { + this.listColumn = listColumn; + } + + public void setIndexColumn(int indexColumn) { + this.indexColumn = indexColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + ColumnVector outV = batch.cols[outputColumn]; + ListColumnVector listV = (ListColumnVector) batch.cols[listColumn]; + ColumnVector childV = listV.child; + LongColumnVector indexColumnVector = (LongColumnVector) batch.cols[indexColumn]; + indexColumnVector.flatten(batch.selectedInUse, batch.selected, batch.size); + long[] indexV = indexColumnVector.vector; + + outV.noNulls = true; + if (listV.isRepeating) { + if (listV.isNull[0]) { + outV.isNull[0] = true; + } else { + if (indexV[0] >= listV.lengths[0]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV); + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < batch.size; j++) { + int i = batch.selected[j]; + if (indexV[i] >= listV.lengths[i]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(i, (int) (listV.offsets[i] + indexV[i]), childV); + } + } else { + for (int i = 0; i < batch.size; i++) { + if (indexV[i] >= listV.lengths[i]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(i, (int) (listV.offsets[i] + indexV[i]), childV); + } + } + } + + indexColumnVector.unFlatten(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java new file mode 100644 index 0000000..4683428 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class ListIndexColScalar extends ListIndex { + private static final long serialVersionUID = 1L; + + private int listColumn; + private int index; + + public ListIndexColScalar() { + super(); + } + + public void setListColumn(int listColumn) { + this.listColumn = listColumn; + } + + public void setIndex(int index) { + this.index = index; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + ColumnVector outV = batch.cols[outputColumn]; + ListColumnVector listV = (ListColumnVector) batch.cols[listColumn]; + ColumnVector childV = listV.child; + + outV.noNulls = true; + if (listV.isRepeating) { + if (listV.isNull[0]) { + outV.isNull[0] = true; + } else { + if (index >= listV.lengths[0]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(0, (int) (listV.offsets[0] + index), childV); + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < batch.size; j++) { + int i = batch.selected[j]; + if (index >= listV.lengths[i]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(i, (int) (listV.offsets[i] + index), childV); + } + } else { + for (int i = 0; i < batch.size; i++) { + if (index >= listV.lengths[i]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(i, (int) (listV.offsets[i] + index), childV); + } + } + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 50eda15..1787f0d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.physical; +import static org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.isIntFamily; import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; import java.io.Serializable; @@ -2129,6 +2130,7 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressi return false; } boolean isInExpression = false; + boolean isIndexExpression = false; if (desc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc; boolean r = validateGenericUdf(d); @@ -2138,15 +2140,30 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressi } GenericUDF genericUDF = d.getGenericUDF(); isInExpression = (genericUDF instanceof GenericUDFIn); + isIndexExpression = (genericUDF instanceof GenericUDFIndex); } if (desc.getChildren() != null) { - if (isInExpression - && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) { + ExprNodeDesc first = desc.getChildren().get(0); + if (isInExpression && first.getTypeInfo().getCategory() == Category.STRUCT) { // Don't restrict child expressions for projection. // Always use loose FILTER mode. if (!validateStructInExpression(desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { return false; } + } else if (isIndexExpression && first.getTypeInfo().getCategory() == Category.LIST) { + ExprNodeDesc second = desc.getChildren().get(1); + if (second.getTypeInfo().getCategory() == Category.PRIMITIVE) { + switch (((PrimitiveTypeInfo) second.getTypeInfo()).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: + case LONG: + return true; + default: + return false; + } + } + return true; } else { for (ExprNodeDesc d : desc.getChildren()) { // Don't restrict child expressions for projection. @@ -2212,6 +2229,9 @@ boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle, } try { VectorizationContext vc = new ValidatorVectorizationContext(hiveConf); + if (currentOperator != null) { + vc.setOperatorType(currentOperator.getType()); + } if (vc.getVectorExpression(desc, mode) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. setExpressionIssue(expressionTitle, "getVectorExpression returned null"); @@ -3231,6 +3251,7 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { Operator vectorOp = null; boolean isNative; + vContext.setOperatorType(op.getType()); switch (op.getType()) { case TABLESCAN: vectorOp = vectorizeTableScanOperator(op, vContext); diff --git ql/src/test/queries/clientpositive/vector_array.q ql/src/test/queries/clientpositive/vector_array.q new file mode 100644 index 0000000..6696485 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_array.q @@ -0,0 +1,40 @@ +set hive.compute.query.using.stats=false; +set hive.strict.checks.cartesian.product=false; +set hive.cli.print.header=true; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; + +SET hive.vectorized.execution.enabled=false; +set hive.llap.io.enabled=false; + + +CREATE TABLE orc_array +STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +AS +SELECT + ARRAY(1, 2) AS int_array, + ARRAY(1.2, 3.4) AS double_array, + ARRAY('A', 'B') AS string_array, + 0 AS number +UNION ALL +SELECT + ARRAY(3, 4) AS int_array, + ARRAY(5.6, 7.8) AS double_array, + ARRAY('C', 'D') AS string_array, + 1 AS number; + + +EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b; + +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b; + + +SET hive.vectorized.execution.enabled=true; +set hive.llap.io.enabled=true; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b; + +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b; diff --git ql/src/test/results/clientpositive/llap/vector_array.q.out ql/src/test/results/clientpositive/llap/vector_array.q.out new file mode 100644 index 0000000..6087616 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_array.q.out @@ -0,0 +1,215 @@ +PREHOOK: query: CREATE TABLE orc_array +STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +AS +SELECT + ARRAY(1, 2) AS int_array, + ARRAY(1.2, 3.4) AS double_array, + ARRAY('A', 'B') AS string_array, + 0 AS number +UNION ALL +SELECT + ARRAY(3, 4) AS int_array, + ARRAY(5.6, 7.8) AS double_array, + ARRAY('C', 'D') AS string_array, + 1 AS number +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_array +POSTHOOK: query: CREATE TABLE orc_array +STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +AS +SELECT + ARRAY(1, 2) AS int_array, + ARRAY(1.2, 3.4) AS double_array, + ARRAY('A', 'B') AS string_array, + 0 AS number +UNION ALL +SELECT + ARRAY(3, 4) AS int_array, + ARRAY(5.6, 7.8) AS double_array, + ARRAY('C', 'D') AS string_array, + 1 AS number +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_array +POSTHOOK: Lineage: orc_array.double_array EXPRESSION [] +POSTHOOK: Lineage: orc_array.int_array EXPRESSION [] +POSTHOOK: Lineage: orc_array.number EXPRESSION [] +POSTHOOK: Lineage: orc_array.string_array EXPRESSION [] +_u1.int_array _u1.double_array _u1.string_array _u1.number +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_array + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: string_array[1] (type: string), string_array[number] (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_array +#### A masked pattern was here #### +POSTHOOK: query: SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_array +#### A masked pattern was here #### +a b +B A +D D +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_array + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: string_array[1] (type: string), string_array[number] (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 6] + selectExpressions: ListIndexColScalar(children: col 2, ConstantVectorExpression(val 1) -> 5:long) -> 4:null, ListIndexColColumn(children: col 2, col 3) -> 6:null + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_array +#### A masked pattern was here #### +POSTHOOK: query: SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_array +#### A masked pattern was here #### +a b +B A +D D diff --git ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 5ea4b0f..bd1b134 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -236,7 +236,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Data type array of Column[a] not supported + notVectorizedReason: Key expression for REDUCESINK operator: Unexpected hive type name array vectorized: false Stage: Stage-0 diff --git ql/src/test/results/clientpositive/vector_array.q.out ql/src/test/results/clientpositive/vector_array.q.out new file mode 100644 index 0000000..c5f81c5 --- /dev/null +++ ql/src/test/results/clientpositive/vector_array.q.out @@ -0,0 +1,188 @@ +PREHOOK: query: CREATE TABLE orc_array +STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +AS +SELECT + ARRAY(1, 2) AS int_array, + ARRAY(1.2, 3.4) AS double_array, + ARRAY('A', 'B') AS string_array, + 0 AS number +UNION ALL +SELECT + ARRAY(3, 4) AS int_array, + ARRAY(5.6, 7.8) AS double_array, + ARRAY('C', 'D') AS string_array, + 1 AS number +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_array +POSTHOOK: query: CREATE TABLE orc_array +STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +AS +SELECT + ARRAY(1, 2) AS int_array, + ARRAY(1.2, 3.4) AS double_array, + ARRAY('A', 'B') AS string_array, + 0 AS number +UNION ALL +SELECT + ARRAY(3, 4) AS int_array, + ARRAY(5.6, 7.8) AS double_array, + ARRAY('C', 'D') AS string_array, + 1 AS number +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_array +POSTHOOK: Lineage: orc_array.double_array EXPRESSION [] +POSTHOOK: Lineage: orc_array.int_array EXPRESSION [] +POSTHOOK: Lineage: orc_array.number EXPRESSION [] +POSTHOOK: Lineage: orc_array.string_array EXPRESSION [] +_u1.int_array _u1.double_array _u1.string_array _u1.number +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_array + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: string_array[1] (type: string), string_array[number] (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_array +#### A masked pattern was here #### +POSTHOOK: query: SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_array +#### A masked pattern was here #### +a b +B A +D D +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_array + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: string_array[1] (type: string), string_array[number] (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 6] + selectExpressions: ListIndexColScalar(children: col 2, ConstantVectorExpression(val 1) -> 5:long) -> 4:null, ListIndexColColumn(children: col 2, col 3) -> 6:null + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Uniform Hash IS false + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_array +#### A masked pattern was here #### +POSTHOOK: query: SELECT string_array[1] AS a, string_array[number] AS b FROM orc_array ORDER BY a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_array +#### A masked pattern was here #### +a b +B A +D D diff --git ql/src/test/results/clientpositive/vector_complex_join.q.out ql/src/test/results/clientpositive/vector_complex_join.q.out index 513c159..51fd09c 100644 --- ql/src/test/results/clientpositive/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/vector_complex_join.q.out @@ -214,7 +214,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Data type array of Column[a] not supported + notVectorizedReason: Key expression for MAPJOIN operator: Unexpected hive type name array vectorized: false Local Work: Map Reduce Local Work