diff --git data/files/TJOIN1 data/files/TJOIN1 new file mode 100644 index 0000000..20e0ed5 --- /dev/null +++ data/files/TJOIN1 @@ -0,0 +1,3 @@ +0|10|15 +1|20|25 +2|\N|50 diff --git data/files/TJOIN2 data/files/TJOIN2 new file mode 100644 index 0000000..bbf3927 --- /dev/null +++ data/files/TJOIN2 @@ -0,0 +1,4 @@ +0|10|BB +1|15|DD +2|\N|EE +3|10|FF diff --git data/files/TJOIN3 data/files/TJOIN3 new file mode 100644 index 0000000..a7e688b --- /dev/null +++ data/files/TJOIN3 @@ -0,0 +1,2 @@ +0|10|XX +1|15|YY diff --git data/files/TJOIN4 data/files/TJOIN4 new file mode 100644 index 0000000..972c8aa --- /dev/null +++ data/files/TJOIN4 @@ -0,0 +1 @@ +0|20|ZZ diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 935fd28..93f5618 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -288,6 +288,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_outer_join3.q,\ vector_outer_join4.q,\ vector_outer_join5.q,\ + vector_outer_join6.q,\ vector_partition_diff_num_cols.q,\ vector_partitioned_date_time.q,\ vector_reduce_groupby_decimal.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index 34b81e7..c56903e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -260,4 +260,19 @@ public void copyByReference(VectorizedRowBatch inBatch, int inBatchIndex, Vector copyRow.copy(inBatch, inBatchIndex, outBatch, outBatchIndex); } } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("VectorCopyRow "); + for (CopyRow copyRow : subRowToBatchCopiersByValue) { + if (sb.length() > 0) { + sb.append(", "); + } + sb.append(copyRow.getClass().getName()); + sb.append(" inColumnIndex " + copyRow.inColumnIndex); + sb.append(" outColumnIndex " + copyRow.outColumnIndex); + } + return sb.toString(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 2d9da84..c1c137b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -166,6 +166,8 @@ protected int generateHashMapResultSingleValue(VectorizedRowBatch batch, int batchIndex = allMatchs[allMatchesIndex + i]; + // Outer key copying is only used when we are using the input BigTable batch as the output. + // if (bigTableVectorCopyOuterKeys != null) { // Copy within row. bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex); @@ -232,17 +234,12 @@ protected void generateHashMapResultMultiValue(VectorizedRowBatch batch, // Copy the BigTable values into the overflow batch. Since the overflow batch may // not get flushed here, we must copy by value. + // Note this includes any outer join keys that need to go into the small table "area". if (bigTableRetainedVectorCopy != null) { bigTableRetainedVectorCopy.copyByValue(batch, batchIndex, overflowBatch, overflowBatch.size); } - // Reference the keys we just copied above. - if (bigTableVectorCopyOuterKeys != null) { - bigTableVectorCopyOuterKeys.copyByReference(overflowBatch, overflowBatch.size, - overflowBatch, overflowBatch.size); - } - if (smallTableVectorDeserializeRow != null) { byte[] bytes = byteSegmentRef.getBytes(); @@ -333,12 +330,6 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, overflowBatch.cols[column].isRepeating = true; } } - if (bigTableVectorCopyOuterKeys != null) { - bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, overflowBatch, 0); - for (int column : bigTableOuterKeyOutputVectorColumns) { - overflowBatch.cols[column].isRepeating = true; - } - } // Crucial here that we don't reset the overflow batch, or we will loose the small table // values we put in above. @@ -349,13 +340,6 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, ColumnVector colVector = overflowBatch.cols[column]; colVector.reset(); } - - if (bigTableVectorCopyOuterKeys != null) { - for (int column : bigTableOuterKeyOutputVectorColumns) { - ColumnVector colVector = overflowBatch.cols[column]; - colVector.reset(); - } - } } if (hashMapResult.isEof()) { diff --git ql/src/test/queries/clientpositive/vector_outer_join6.q ql/src/test/queries/clientpositive/vector_outer_join6.q new file mode 100644 index 0000000..b430108 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_outer_join6.q @@ -0,0 +1,42 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +SET hive.auto.convert.join=true; + +-- SORT_QUERY_RESULTS + +create table TJOIN1_txt (RNUM int , C1 int, C2 int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; + +create table TJOIN2_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; + +create table if not exists TJOIN3_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; + +create table TJOIN4_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; + +load data local inpath '../../data/files/TJOIN1' into table TJOIN1_txt; +load data local inpath '../../data/files/TJOIN2' into table TJOIN2_txt; +load data local inpath '../../data/files/TJOIN3' into table TJOIN3_txt; +load data local inpath '../../data/files/TJOIN4' into table TJOIN4_txt; + +create table TJOIN1 stored as orc AS SELECT * FROM TJOIN1_txt; +create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt; +create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt; +create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt; + +explain +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; + +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; + +explain +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; + +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; diff --git ql/src/test/results/clientpositive/tez/vector_outer_join6.q.out ql/src/test/results/clientpositive/tez/vector_outer_join6.q.out new file mode 100644 index 0000000..e3b8cf5 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_outer_join6.q.out @@ -0,0 +1,357 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table TJOIN1_txt (RNUM int , C1 int, C2 int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table TJOIN1_txt (RNUM int , C1 int, C2 int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1_txt +PREHOOK: query: create table TJOIN2_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2_txt +POSTHOOK: query: create table TJOIN2_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2_txt +PREHOOK: query: create table if not exists TJOIN3_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN3_txt +POSTHOOK: query: create table if not exists TJOIN3_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN3_txt +PREHOOK: query: create table TJOIN4_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN4_txt +POSTHOOK: query: create table TJOIN4_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN4_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN1' into table TJOIN1_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN1' into table TJOIN1_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN2' into table TJOIN2_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN2' into table TJOIN2_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN3' into table TJOIN3_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin3_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN3' into table TJOIN3_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin3_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN4' into table TJOIN4_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin4_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN4' into table TJOIN4_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin4_txt +PREHOOK: query: create table TJOIN1 stored as orc AS SELECT * FROM TJOIN1_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin1_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table TJOIN1 stored as orc AS SELECT * FROM TJOIN1_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin1_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin2_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin2_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin3_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN3 +POSTHOOK: query: create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin3_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN3 +PREHOOK: query: create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin4_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN4 +POSTHOOK: query: create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin4_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN4 +PREHOOK: query: explain +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: tjoin3 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +PREHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +POSTHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +POSTHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +0 0 0 +0 3 0 +1 NULL NULL +2 NULL NULL +PREHOOK: query: explain +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: tjoin3 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +PREHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +POSTHOOK: query: select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +POSTHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +0 0 +0 3 +1 NULL +2 NULL diff --git ql/src/test/results/clientpositive/vector_outer_join6.q.out ql/src/test/results/clientpositive/vector_outer_join6.q.out new file mode 100644 index 0000000..b7c15e8 --- /dev/null +++ ql/src/test/results/clientpositive/vector_outer_join6.q.out @@ -0,0 +1,348 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table TJOIN1_txt (RNUM int , C1 int, C2 int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table TJOIN1_txt (RNUM int , C1 int, C2 int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1_txt +PREHOOK: query: create table TJOIN2_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2_txt +POSTHOOK: query: create table TJOIN2_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2_txt +PREHOOK: query: create table if not exists TJOIN3_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN3_txt +POSTHOOK: query: create table if not exists TJOIN3_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN3_txt +PREHOOK: query: create table TJOIN4_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN4_txt +POSTHOOK: query: create table TJOIN4_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN4_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN1' into table TJOIN1_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN1' into table TJOIN1_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN2' into table TJOIN2_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN2' into table TJOIN2_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN3' into table TJOIN3_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin3_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN3' into table TJOIN3_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin3_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN4' into table TJOIN4_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin4_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN4' into table TJOIN4_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin4_txt +PREHOOK: query: create table TJOIN1 stored as orc AS SELECT * FROM TJOIN1_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin1_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table TJOIN1 stored as orc AS SELECT * FROM TJOIN1_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin1_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin2_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin2_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin3_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN3 +POSTHOOK: query: create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin3_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN3 +PREHOOK: query: create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin4_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN4 +POSTHOOK: query: create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin4_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN4 +PREHOOK: query: explain +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:tjoin2 + Fetch Operator + limit: -1 + $hdt$_1:tjoin3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:tjoin2 + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + $hdt$_1:tjoin3 + TableScan + alias: tjoin3 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col1 (type: int) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +PREHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +POSTHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +POSTHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +0 0 0 +0 3 0 +1 NULL NULL +2 NULL NULL +PREHOOK: query: explain +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:tjoin2 + Fetch Operator + limit: -1 + $hdt$_1:tjoin3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:tjoin2 + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + $hdt$_1:tjoin3 + TableScan + alias: tjoin3 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +PREHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +POSTHOOK: query: select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +POSTHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +0 0 +0 3 +1 NULL +2 NULL