diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java index 5390ba7..976a2d5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java @@ -77,6 +77,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.Lists; + + /** * Operator factory for predicate pushdown processing of operator graph Each * operator determines the pushdown predicates by walking the expression tree. @@ -100,9 +103,12 @@ private static ExprWalkerInfo getChildWalkerInfo(Operator current, OpWalkerIn } if (current.getNumChild() > 1) { // ppd for multi-insert query is not yet implemented - // no-op for leafs - for (Operator child : current.getChildOperators()) { - removeCandidates(child, owi); // remove candidated filters on this branch + // we assume that nothing can is pushed beyond this operator + List> children = + Lists.newArrayList(current.getChildOperators()); + for (Operator child : children) { + ExprWalkerInfo childInfo = owi.getPrunedPreds(child); + createFilter(child, childInfo, owi); } return null; } @@ -111,10 +117,24 @@ private static ExprWalkerInfo getChildWalkerInfo(Operator current, OpWalkerIn private static void removeCandidates(Operator operator, OpWalkerInfo owi) { if (operator instanceof FilterOperator) { + if (owi.getCandidateFilterOps().contains(operator)) { + List> children = operator.getChildOperators(); + List> parents = operator.getParentOperators(); + for (Operator parent : parents) { + parent.getChildOperators().addAll(children); + parent.removeChild(operator); + } + for (Operator child : children) { + child.getParentOperators().addAll(parents); + child.removeParent(operator); + } + } owi.getCandidateFilterOps().remove(operator); } if (operator.getChildOperators() != null) { - for (Operator child : operator.getChildOperators()) { + List> children = + Lists.newArrayList(operator.getChildOperators()); + for (Operator child : children) { removeCandidates(child, owi); } } @@ -899,20 +919,9 @@ protected static Object createFilter(Operator op, if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { // remove the candidate filter ops - for (FilterOperator fop : owi.getCandidateFilterOps()) { - List> children = fop.getChildOperators(); - List> parents = fop.getParentOperators(); - for (Operator parent : parents) { - parent.getChildOperators().addAll(children); - parent.removeChild(fop); - } - for (Operator child : children) { - child.getParentOperators().addAll(parents); - child.removeParent(fop); - } - } - owi.getCandidateFilterOps().clear(); + removeCandidates(op, owi); } + // push down current ppd context to newly added filter ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op); if (walkerInfo != null) { diff --git ql/src/test/queries/clientpositive/multi_insert_with_join2.q ql/src/test/queries/clientpositive/multi_insert_with_join2.q new file mode 100644 index 0000000..1529fa2 --- /dev/null +++ ql/src/test/queries/clientpositive/multi_insert_with_join2.q @@ -0,0 +1,51 @@ +set hive.cbo.enable=false; + +CREATE TABLE T_A ( id STRING, val STRING ); +CREATE TABLE T_B ( id STRING, val STRING ); +CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING ); +CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING ); + +INSERT INTO TABLE T_A +VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103'); + +INSERT INTO TABLE T_B +VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104'); + +explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103'; + +explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val; + +explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val; + +explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2'; + +explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2'; diff --git ql/src/test/results/clientpositive/multi_insert_with_join2.q.out ql/src/test/results/clientpositive/multi_insert_with_join2.q.out new file mode 100644 index 0000000..e252de1 --- /dev/null +++ ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -0,0 +1,555 @@ +PREHOOK: query: CREATE TABLE T_A ( id STRING, val STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T_A +POSTHOOK: query: CREATE TABLE T_A ( id STRING, val STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T_A +PREHOOK: query: CREATE TABLE T_B ( id STRING, val STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T_B +POSTHOOK: query: CREATE TABLE T_B ( id STRING, val STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T_B +PREHOOK: query: CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@join_result_1 +POSTHOOK: query: CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@join_result_1 +PREHOOK: query: CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@join_result_3 +POSTHOOK: query: CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@join_result_3 +PREHOOK: query: INSERT INTO TABLE T_A +VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@t_a +POSTHOOK: query: INSERT INTO TABLE T_A +VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@t_a +POSTHOOK: Lineage: t_a.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t_a.val SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE T_B +VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@t_b +POSTHOOK: query: INSERT INTO TABLE T_B +VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@t_b +POSTHOOK: Lineage: t_b.id SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t_b.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 <> 'val_104') and (_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2') and (_col1 <> _col6)) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator +