diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 409fc90..1cc0104 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -741,7 +741,8 @@ minillaplocal.query.files=acid_globallimit.q,\ smb_mapjoin_17.q,\ groupby_resolution.q,\ windowing_windowspec2.q,\ - vectorized_join46.q + vectorized_join46.q,\ + vectorized_multi_output_select.q encrypted.query.files=encryption_join_unencrypted_tbl.q,\ encryption_insert_partition_static.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index 5c490ef..30ab503 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -134,13 +135,59 @@ public void process(Object row, int tag) throws HiveException { int originalProjectionSize = vrg.projectionSize; vrg.projectionSize = projectedOutputColumns.length; vrg.projectedColumns = this.projectedOutputColumns; - forward(vrg, outputObjInspector); - + // Checks if their are multiple children. + // If so, before 1st child we copy batch.size, batch.selectedInUse, and the contents + // of selected array for batch.size elements when batch.selectedInUse is true. + // Then, before 2nd and further children, we restore those values. + if (getNumChild() > 1) { + int size = vrg.size; + boolean selectedInUse = vrg.selectedInUse; + int[] selected = null; + if (selectedInUse) { + selected = Arrays.copyOf(vrg.selected, size); + } + internalForward(vrg, size, selectedInUse, selected); + } else { + forward(vrg, outputObjInspector); + } // Revert the projected columns back, because vrg will be re-used. vrg.projectionSize = originalProjectionSize; vrg.projectedColumns = originalProjections; } + private void internalForward(VectorizedRowBatch vrg, int size, boolean selectedInUse, int[] selected) + throws HiveException { + runTimeNumRows++; + if (getDone()) { + return; + } + + int childrenDone = 0; + boolean reset = false; + for (int i = 0; i < childOperatorsArray.length; i++) { + Operator o = childOperatorsArray[i]; + if (o.getDone()) { + childrenDone++; + } else { + if (reset) { + // Restore original values + vrg.size = size; + vrg.selectedInUse = selectedInUse; + if (vrg.selectedInUse) { + vrg.selected = selected != null ? Arrays.copyOf(selected, size) : null; + } + } + o.process(vrg, childOperatorsTag[i]); + reset = true; + } + } + + // if all children are done, this operator is also done + if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { + setDone(true); + } + } + public VectorExpression[] getvExpressions() { return vExpressions; } diff --git ql/src/test/queries/clientpositive/vectorized_multi_output_select.q ql/src/test/queries/clientpositive/vectorized_multi_output_select.q new file mode 100644 index 0000000..e768a5d --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_multi_output_select.q @@ -0,0 +1,28 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=3000; +set hive.strict.checks.cartesian.product=false; +set hive.merge.nway.joins=false; +set hive.vectorized.execution.enabled=true; + +explain +select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2; + +select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2; diff --git ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out new file mode 100644 index 0000000..f744eb6 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out @@ -0,0 +1,201 @@ +Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Reducer 2' is a cross product +PREHOOK: query: explain +select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 5 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value = 'val_278') and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value = 'val_255') and key is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Reducer 2' is a cross product +PREHOOK: query: select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select count(*) as h8_30_to_9 + from src + join src1 on src.key = src1.key + where src1.value = "val_278") s1 +join ( + select count(*) as h9_to_9_30 + from src + join src1 on src.key = src1.key + where src1.value = "val_255") s2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +2 2