diff --git data/files/tjoin1.txt data/files/tjoin1.txt new file mode 100644 index 0000000..897e0c5 --- /dev/null +++ data/files/tjoin1.txt @@ -0,0 +1,3 @@ +0|10|15 +1|20|25 +2|\N|50 \ No newline at end of file diff --git data/files/tjoin2.txt data/files/tjoin2.txt new file mode 100644 index 0000000..24820e9 --- /dev/null +++ data/files/tjoin2.txt @@ -0,0 +1,4 @@ +0|10|BB +1|15|DD +2|\N|EE +3|10|FF \ No newline at end of file diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 61d376a..e30eeed 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -211,6 +211,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_groupby_3.q,\ vector_groupby_reduce.q,\ vector_left_outer_join.q,\ + vector_left_outer_join2.q,\ vector_mapjoin_reduce.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 2c8aee1..098c47b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -95,9 +95,13 @@ public VectorMapJoinOperator (VectorizationContext vContext, OperatorDesc conf) filterMaps = desc.getFilterMap(); noOuterJoin = desc.isNoOuterJoin(); - Map> filterExpressions = desc.getFilters(); - bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), - VectorExpressionDescriptor.Mode.FILTER); + if (!noOuterJoin) { + // It is only valid to pre-filter for INNER JOIN. OUTER JOIN requires post-ON condition + // evaluation of filters. They will be done by our super class MapJoinOperator. + Map> filterExpressions = desc.getFilters(); + bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), + VectorExpressionDescriptor.Mode.FILTER); + } List keyDesc = desc.getKeys().get(posBigTable); keyExpressions = vContext.getVectorExpressions(keyDesc); @@ -175,7 +179,11 @@ protected Object _evaluate(Object row, int version) throws HiveException { joinValues[posBigTable] = vectorNodeEvaluators; // Filtering is handled in the input batch processing - filterMaps[posBigTable] = null; + if (!noOuterJoin) { + // It is only valid to pre-filter for INNER JOIN. OUTER JOIN requires post-ON condition + // evaluation of filters. They will be done by our super class MapJoinOperator. + filterMaps[posBigTable] = null; + } outputVectorAssigners = new HashMap(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index ecd92d5..e59488e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -103,9 +103,13 @@ public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf // Must obtain vectorized equivalents for filter and value expressions - Map> filterExpressions = desc.getFilters(); - bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), + if (!noOuterJoin) { + // It is only valid to pre-filter for INNER JOIN. OUTER JOIN requires post-ON condition + // evaluation of filters. They will be done by our super class SMBMapJoinOperator. + Map> filterExpressions = desc.getFilters(); + bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), VectorExpressionDescriptor.Mode.FILTER); + } List keyDesc = desc.getKeys().get(posBigTable); keyExpressions = vContext.getVectorExpressions(keyDesc); diff --git ql/src/test/queries/clientpositive/vector_left_outer_join2.q ql/src/test/queries/clientpositive/vector_left_outer_join2.q new file mode 100644 index 0000000..39fc301 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -0,0 +1,24 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +drop table if exists TJOIN1; +drop table if exists TJOIN2; +create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc; +create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE; +LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2; +INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE; + +set hive.vectorized.execution.enabled=false; +explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +set hive.vectorized.execution.enabled=true; +explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); diff --git ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out new file mode 100644 index 0000000..afc8cbd --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out @@ -0,0 +1,213 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2 +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + input vertices: + 1 Map 2 + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 5 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 5 Data size: 31 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + input vertices: + 1 Map 2 + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 5 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c1 (type: int) + sort order: + + Map-reduce partition columns: c1 (type: int) + Statistics: Num rows: 5 Data size: 31 Basic stats: COMPLETE Column stats: NONE + value expressions: c2 (type: char(2)) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +1 20 25 NULL +2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/vector_left_outer_join2.q.out new file mode 100644 index 0000000..4cca3b2 --- /dev/null +++ ql/src/test/results/clientpositive/vector_left_outer_join2.q.out @@ -0,0 +1,221 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2 +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + tjoin2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tjoin2 + TableScan + alias: tjoin2 + Statistics: Num rows: 5 Data size: 31 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL +PREHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + tjoin2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + tjoin2 + TableScan + alias: tjoin2 + Statistics: Num rows: 5 Data size: 31 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(c2 > 15)} + 1 + keys: + 0 c1 (type: int) + 1 c1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col8 + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +0 10 15 NULL +1 20 25 NULL +2 NULL 50 NULL