diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index e445d3b..42eef40 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -269,6 +269,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_acid3.q,\ vector_aggregate_9.q,\ vector_aggregate_without_gby.q,\ + vector_array.q,\ vector_auto_smb_mapjoin_14.q,\ vector_between_columns.q,\ vector_between_in.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index f4499d7..5370581 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -76,6 +76,7 @@ INTERVAL_YEAR_MONTH (0x100), INTERVAL_DAY_TIME (0x200), BINARY (0x400), + ARRAY (0x800), DATETIME_FAMILY (DATE.value | TIMESTAMP.value), INTERVAL_FAMILY (INTERVAL_YEAR_MONTH.value | INTERVAL_DAY_TIME.value), INT_INTERVAL_YEAR_MONTH (INT_FAMILY.value | INTERVAL_YEAR_MONTH.value), @@ -125,6 +126,8 @@ public static ArgumentType fromHiveTypeName(String hiveTypeName) { } else if (lower.equals("void")) { // The old code let void through... return INT_FAMILY; + } else if (VectorizationContext.arrayTypePattern.matcher(lower).matches()) { + return ARRAY; } else { return NONE; } @@ -143,6 +146,8 @@ public static ArgumentType getType(String inType) { return CHAR; } else if (VectorizationContext.varcharTypePattern.matcher(inType).matches()) { return VARCHAR; + } else if (VectorizationContext.arrayTypePattern.matcher(inType).matches()) { + return ARRAY; } return valueOf(inType.toUpperCase()); } @@ -169,6 +174,8 @@ public static String getVectorColumnSimpleName(ArgumentType argType) { argType == VARCHAR || argType == BINARY) { return "String"; + } else if (argType == ARRAY) { + return "Array"; } else { return "None"; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 5b0c2bf..2f1bd07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -319,6 +319,9 @@ public void addProjectionColumn(String columnName, int vectorBatchColIndex) { public static final Pattern charVarcharTypePattern = Pattern.compile("char.*|varchar.*", Pattern.CASE_INSENSITIVE); + public static final Pattern arrayTypePattern = Pattern.compile("array.*", + Pattern.CASE_INSENSITIVE); + //Map column number to type private OutputColumnManager ocm; @@ -2461,6 +2464,10 @@ public static boolean isDecimalFamily(String colType) { return decimalTypePattern.matcher(colType).matches(); } + public static boolean isArrayFamily(String colType) { + return arrayTypePattern.matcher(colType).matches(); + } + private Object getScalarValue(ExprNodeConstantDesc constDesc) throws HiveException { if (constDesc.getTypeString().equalsIgnoreCase("String")) { @@ -2620,6 +2627,8 @@ static String getUndecoratedName(String hiveTypeName) throws HiveException { case INTERVAL_YEAR_MONTH: case INTERVAL_DAY_TIME: return hiveTypeName; + case ARRAY: + return "Array"; default: throw new HiveException("Unexpected hive type name " + hiveTypeName); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndex.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndex.java new file mode 100644 index 0000000..fecb516 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndex.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.conf.Configuration; + +import java.util.regex.Pattern; + +abstract class ListIndex extends VectorExpression { + protected static Pattern PATTERN = Pattern.compile("array<(.*)>"); + protected int outputColumn; + + @Override + public void init(Configuration conf) { + super.init(conf); + setOutputType(PATTERN.matcher(childExpressions[0].getOutputType()).group()); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java new file mode 100644 index 0000000..0f1e2bb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class ListIndexColColumn extends ListIndex { + private static final long serialVersionUID = 1L; + + private int listColumn; + private int indexColumn; + + public ListIndexColColumn() { + super(); + } + + public ListIndexColColumn(int listColumn, int indexColumn, int outputColumn) { + this.listColumn = listColumn; + this.indexColumn = indexColumn; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + ColumnVector outV = batch.cols[outputColumn]; + ListColumnVector listV = (ListColumnVector) batch.cols[listColumn]; + ColumnVector childV = listV.child; + LongColumnVector indexColumnVector = (LongColumnVector) batch.cols[indexColumn]; + indexColumnVector.flatten(batch.selectedInUse, batch.selected, batch.size); + long[] indexV = indexColumnVector.vector; + + outV.noNulls = true; + if (listV.isRepeating) { + if (listV.isNull[0]) { + outV.isNull[0] = true; + } else { + if (indexV[0] >= listV.lengths[0]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV); + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < batch.size; j++) { + int i = batch.selected[j]; + if (indexV[i] >= listV.lengths[i]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(i, (int) (listV.offsets[i] + indexV[i]), childV); + } + } else { + for (int i = 0; i < batch.size; i++) { + if (indexV[i] >= listV.lengths[i]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(i, (int) (listV.offsets[i] + indexV[i]), childV); + } + } + } + + indexColumnVector.unFlatten(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.ARRAY, + VectorExpressionDescriptor.ArgumentType.INT_FAMILY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java new file mode 100644 index 0000000..d8c2dfc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class ListIndexColScalar extends ListIndex { + private static final long serialVersionUID = 1L; + + private int listColumn; + private int index; + + public ListIndexColScalar() { + super(); + } + + public ListIndexColScalar(int listColumn, int index, int outputColumn) { + this.listColumn = listColumn; + this.index = index; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + ColumnVector outV = batch.cols[outputColumn]; + ListColumnVector listV = (ListColumnVector) batch.cols[listColumn]; + ColumnVector childV = listV.child; + + outV.noNulls = true; + if (listV.isRepeating) { + if (listV.isNull[0]) { + outV.isNull[0] = true; + } else { + if (index >= listV.lengths[0]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(0, (int) (listV.offsets[0] + index), childV); + } + } else { + if (batch.selectedInUse) { + for (int j = 0; j < batch.size; j++) { + int i = batch.selected[j]; + if (index >= listV.lengths[i]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(i, (int) (listV.offsets[i] + index), childV); + } + } else { + for (int i = 0; i < batch.size; i++) { + if (index >= listV.lengths[i]) { + throw new IndexOutOfBoundsException(); + } + outV.setElement(i, (int) (listV.offsets[i] + index), childV); + } + } + } + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.ARRAY, + VectorExpressionDescriptor.ArgumentType.INT_FAMILY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 8e23094..2f5a48f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -34,7 +34,7 @@ public abstract class VectorExpression implements Serializable { public enum Type { STRING, CHAR, VARCHAR, TIMESTAMP, DATE, LONG, DOUBLE, DECIMAL, - INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME, BINARY, OTHER; + INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME, BINARY, ARRAY, OTHER; private static Map types = ImmutableMap.builder() .put("string", STRING) .put("char", CHAR) @@ -47,6 +47,7 @@ .put("interval_year_month", INTERVAL_YEAR_MONTH) .put("interval_day_time", INTERVAL_DAY_TIME) .put("binary", BINARY) + .put("array", ARRAY) .build(); public static Type getValue(String name) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 50eda15..9aaa9a6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -261,6 +261,9 @@ patternBuilder.append("|char.*"); patternBuilder.append("|varchar.*"); + // Array + patternBuilder.append("|array.*"); + supportedDataTypesPattern = Pattern.compile(patternBuilder.toString()); } @@ -397,6 +400,7 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFAbs.class); supportedGenericUDFs.add(GenericUDFBetween.class); supportedGenericUDFs.add(GenericUDFIn.class); + supportedGenericUDFs.add(GenericUDFIndex.class); supportedGenericUDFs.add(GenericUDFCase.class); supportedGenericUDFs.add(GenericUDFWhen.class); supportedGenericUDFs.add(GenericUDFCoalesce.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java index bdb2361..7cd8c8e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIndex.java @@ -22,6 +22,9 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ListIndexColColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ListIndexColScalar; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; @@ -37,6 +40,7 @@ * */ @Description(name = "index", value = "_FUNC_(a, n) - Returns the n-th element of a ") +@VectorizedExpressions({ListIndexColColumn.class, ListIndexColScalar.class}) public class GenericUDFIndex extends GenericUDF { private transient MapObjectInspector mapOI; diff --git ql/src/test/queries/clientpositive/vector_array.q ql/src/test/queries/clientpositive/vector_array.q new file mode 100644 index 0000000..179f409 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_array.q @@ -0,0 +1,40 @@ +set hive.compute.query.using.stats=false; +set hive.strict.checks.cartesian.product=false; +set hive.cli.print.header=true; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; + +SET hive.vectorized.execution.enabled=false; +set hive.llap.io.enabled=false; + + +CREATE TABLE orc_array +STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +AS +SELECT + ARRAY(1, 2) AS int_array, + ARRAY(1.2, 3.4) AS double_array, + ARRAY('A', 'B') AS string_array, + 0 AS number +UNION ALL +SELECT + ARRAY(3, 4) AS int_array, + ARRAY(5.6, 7.8) AS double_array, + ARRAY('C', 'D') AS string_array, + 1 AS number; + + +EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1], string_array[number] FROM orc_array; + +SELECT string_array[1], string_array[number] FROM orc_array; + + +SET hive.vectorized.execution.enabled=true; +set hive.llap.io.enabled=true; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1], string_array[number] FROM orc_array; + +SELECT string_array[1], string_array[number] FROM orc_array; diff --git ql/src/test/results/clientpositive/llap/vector_array.q.out ql/src/test/results/clientpositive/llap/vector_array.q.out new file mode 100644 index 0000000..2ba755b --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_array.q.out @@ -0,0 +1,171 @@ +PREHOOK: query: CREATE TABLE orc_array +STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +AS +SELECT + ARRAY(1, 2) AS int_array, + ARRAY(1.2, 3.4) AS double_array, + ARRAY('A', 'B') AS string_array, + 0 AS number +UNION ALL +SELECT + ARRAY(3, 4) AS int_array, + ARRAY(5.6, 7.8) AS double_array, + ARRAY('C', 'D') AS string_array, + 1 AS number +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_array +POSTHOOK: query: CREATE TABLE orc_array +STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +AS +SELECT + ARRAY(1, 2) AS int_array, + ARRAY(1.2, 3.4) AS double_array, + ARRAY('A', 'B') AS string_array, + 0 AS number +UNION ALL +SELECT + ARRAY(3, 4) AS int_array, + ARRAY(5.6, 7.8) AS double_array, + ARRAY('C', 'D') AS string_array, + 1 AS number +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_array +POSTHOOK: Lineage: orc_array.double_array EXPRESSION [] +POSTHOOK: Lineage: orc_array.int_array EXPRESSION [] +POSTHOOK: Lineage: orc_array.number EXPRESSION [] +POSTHOOK: Lineage: orc_array.string_array EXPRESSION [] +_u1.int_array _u1.double_array _u1.string_array _u1.number +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1], string_array[number] FROM orc_array +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1], string_array[number] FROM orc_array +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_array + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: string_array[1] (type: string), string_array[number] (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT string_array[1], string_array[number] FROM orc_array +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_array +#### A masked pattern was here #### +POSTHOOK: query: SELECT string_array[1], string_array[number] FROM orc_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_array +#### A masked pattern was here #### +_c0 _c1 +D D +B A +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1], string_array[number] FROM orc_array +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1], string_array[number] FROM orc_array +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_array + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: string_array[1] (type: string), string_array[number] (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5] + selectExpressions: ListIndexColScalar -> 4:string, ListIndexColColumn -> 5:string + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT string_array[1], string_array[number] FROM orc_array +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_array +#### A masked pattern was here #### +POSTHOOK: query: SELECT string_array[1], string_array[number] FROM orc_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_array +#### A masked pattern was here #### +_c0 _c1 +D D +B A diff --git ql/src/test/results/clientpositive/vector_array.q.out ql/src/test/results/clientpositive/vector_array.q.out new file mode 100644 index 0000000..f26287a --- /dev/null +++ ql/src/test/results/clientpositive/vector_array.q.out @@ -0,0 +1,164 @@ +PREHOOK: query: CREATE TABLE orc_array +STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +AS +SELECT + ARRAY(1, 2) AS int_array, + ARRAY(1.2, 3.4) AS double_array, + ARRAY('A', 'B') AS string_array, + 0 AS number +UNION ALL +SELECT + ARRAY(3, 4) AS int_array, + ARRAY(5.6, 7.8) AS double_array, + ARRAY('C', 'D') AS string_array, + 1 AS number +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_array +POSTHOOK: query: CREATE TABLE orc_array +STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +AS +SELECT + ARRAY(1, 2) AS int_array, + ARRAY(1.2, 3.4) AS double_array, + ARRAY('A', 'B') AS string_array, + 0 AS number +UNION ALL +SELECT + ARRAY(3, 4) AS int_array, + ARRAY(5.6, 7.8) AS double_array, + ARRAY('C', 'D') AS string_array, + 1 AS number +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_array +POSTHOOK: Lineage: orc_array.double_array EXPRESSION [] +POSTHOOK: Lineage: orc_array.int_array EXPRESSION [] +POSTHOOK: Lineage: orc_array.number EXPRESSION [] +POSTHOOK: Lineage: orc_array.string_array EXPRESSION [] +_u1.int_array _u1.double_array _u1.string_array _u1.number +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1], string_array[number] FROM orc_array +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1], string_array[number] FROM orc_array +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_array + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: string_array[1] (type: string), string_array[number] (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT string_array[1], string_array[number] FROM orc_array +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_array +#### A masked pattern was here #### +POSTHOOK: query: SELECT string_array[1], string_array[number] FROM orc_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_array +#### A masked pattern was here #### +_c0 _c1 +B A +D D +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1], string_array[number] FROM orc_array +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +SELECT string_array[1], string_array[number] FROM orc_array +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_array + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: string_array[1] (type: string), string_array[number] (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 5] + selectExpressions: ListIndexColScalar -> 4:string, ListIndexColColumn -> 5:string + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT string_array[1], string_array[number] FROM orc_array +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_array +#### A masked pattern was here #### +POSTHOOK: query: SELECT string_array[1], string_array[number] FROM orc_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_array +#### A masked pattern was here #### +_c0 _c1 +B A +D D