diff --git ql/src/test/queries/clientpositive/vectorization_multi_value.q ql/src/test/queries/clientpositive/vectorization_multi_value.q new file mode 100644 index 0000000000..5f6aad98a9 --- /dev/null +++ ql/src/test/queries/clientpositive/vectorization_multi_value.q @@ -0,0 +1,15 @@ + +DROP TABLE IF EXISTS cond_vector; +CREATE TABLE cond_vector(a STRING) STORED AS ORC; +INSERT OVERWRITE TABLE cond_vector VALUES("a/b"); +set hive.fetch.task.conversion=minimal; +set hive.execution.mode=container; + +SELECT IF(1=1, MAP("a","b"), NULL) FROM cond_vector; +EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("Mathematics","78"), NULL) FROM cond_vector; + +SELECT IF(1=1, ARRAY("c", "d"), NULL) FROM cond_vector; +EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY("a", "b"), NULL) FROM cond_vector; + + + diff --git ql/src/test/results/clientpositive/vectorization_multi_value.q.out ql/src/test/results/clientpositive/vectorization_multi_value.q.out new file mode 100644 index 0000000000..691c6486ca --- /dev/null +++ ql/src/test/results/clientpositive/vectorization_multi_value.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: DROP TABLE IF EXISTS cond_vector +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS cond_vector +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE cond_vector(a STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cond_vector +POSTHOOK: query: CREATE TABLE cond_vector(a STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cond_vector +PREHOOK: query: INSERT OVERWRITE TABLE cond_vector VALUES("a/b") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@cond_vector +POSTHOOK: query: INSERT OVERWRITE TABLE cond_vector VALUES("a/b") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@cond_vector +POSTHOOK: Lineage: cond_vector.a SCRIPT [] +PREHOOK: query: SELECT IF(1=1, MAP("a","b"), NULL) FROM cond_vector +PREHOOK: type: QUERY +PREHOOK: Input: default@cond_vector +#### A masked pattern was here #### +POSTHOOK: query: SELECT IF(1=1, MAP("a","b"), NULL) FROM cond_vector +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cond_vector +#### A masked pattern was here #### +{"a":"b"} +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("Mathematics","78"), NULL) FROM cond_vector +PREHOOK: type: QUERY +PREHOOK: Input: default@cond_vector +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("Mathematics","78"), NULL) FROM cond_vector +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cond_vector +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cond_vector + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:string, 1:ROW__ID:struct] + Select Operator + expressions: if(true, map('Mathematics':'78'), null) (type: map) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4] + selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:map, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('Mathematics':'78')) -> 3:map) -> 4:map + Statistics: Num rows: 1 Data size: 337 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 337 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [] + dataColumns: a:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, map, map] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT IF(1=1, ARRAY("c", "d"), NULL) FROM cond_vector +PREHOOK: type: QUERY +PREHOOK: Input: default@cond_vector +#### A masked pattern was here #### +POSTHOOK: query: SELECT IF(1=1, ARRAY("c", "d"), NULL) FROM cond_vector +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cond_vector +#### A masked pattern was here #### +["c","d"] +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY("a", "b"), NULL) FROM cond_vector +PREHOOK: type: QUERY +PREHOOK: Input: default@cond_vector +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY("a", "b"), NULL) FROM cond_vector +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cond_vector +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cond_vector + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:string, 1:ROW__ID:struct] + Select Operator + expressions: if(true, array('a','b'), null) (type: array) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4] + selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:array, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array('a','b')) -> 3:array) -> 4:array + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [] + dataColumns: a:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, array, array] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java index 8cbcc029a5..e11bd78ba0 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + /** * The representation of a vectorized column of list objects. * @@ -158,7 +160,82 @@ public void unFlatten() { @Override public void copySelected(boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { - throw new RuntimeException("Not supported"); + ListColumnVector output = (ListColumnVector) outputColVector; + boolean[] outputIsNull = output.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + if (noNulls || !isNull[0]) { + outputIsNull[0] = false; + outputColVector.setElement(0, 0, this); + } else { + outputIsNull[0] = true; + output.noNulls = false; + } + output.isRepeating = true; + return; + } + + // Handle normal case + + if (noNulls) { + if (selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.setElement(i, i, this); + } + } else { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + outputColVector.setElement(i, i, this); + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + child.shallowCopyTo(output.child); + System.arraycopy(offsets, 0, output.offsets, 0, size); + System.arraycopy(lengths, 0, output.lengths, 0, size); + output.childCount = childCount; + } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + outputColVector.setElement(i, i, this); + } + } else { + child.shallowCopyTo(output.child); + System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(offsets, 0, output.offsets, 0, size); + System.arraycopy(lengths, 0, output.lengths, 0, size); + output.childCount = childCount; + } + } } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java index 3143a44ec8..8ad3ee9c03 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + /** * The representation of a vectorized column of map objects. * @@ -173,6 +175,83 @@ public void unFlatten() { @Override public void copySelected(boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { - throw new RuntimeException("Not supported"); + MapColumnVector output = (MapColumnVector) outputColVector; + boolean[] outputIsNull = output.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + if (noNulls || !isNull[0]) { + outputIsNull[0] = false; + outputColVector.setElement(0, 0, this); + } else { + outputIsNull[0] = true; + output.noNulls = false; + } + output.isRepeating = true; + return; + } + + // Handle normal case + + if (noNulls) { + if (selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.setElement(i, i, this); + } + } else { + for(int j = 0; j != size; j++) { + final int i = sel[j]; + outputColVector.setElement(i, i, this); + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + keys.shallowCopyTo(output.keys); + values.shallowCopyTo(output.values); + System.arraycopy(offsets, 0, output.offsets, 0, size); + System.arraycopy(lengths, 0, output.lengths, 0, size); + output.childCount = childCount; + } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + outputColVector.setElement(i, i, this); + } + } else { + keys.shallowCopyTo(output.keys); + values.shallowCopyTo(output.values); + System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(offsets, 0, output.offsets, 0, size); + System.arraycopy(lengths, 0, output.lengths, 0, size); + output.childCount = childCount; + } + } } }