diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java index 5390ba7..73c2c19 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java @@ -77,6 +77,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.Lists; + + /** * Operator factory for predicate pushdown processing of operator graph Each * operator determines the pushdown predicates by walking the expression tree. @@ -100,9 +103,12 @@ private static ExprWalkerInfo getChildWalkerInfo(Operator current, OpWalkerIn } if (current.getNumChild() > 1) { // ppd for multi-insert query is not yet implemented - // no-op for leafs - for (Operator child : current.getChildOperators()) { - removeCandidates(child, owi); // remove candidated filters on this branch + // we assume that nothing can is pushed beyond this operator + List> children = + Lists.newArrayList(current.getChildOperators()); + for (Operator child : children) { + ExprWalkerInfo childInfo = owi.getPrunedPreds(child); + createFilter(child, childInfo, owi); } return null; } @@ -111,15 +117,40 @@ private static ExprWalkerInfo getChildWalkerInfo(Operator current, OpWalkerIn private static void removeCandidates(Operator operator, OpWalkerInfo owi) { if (operator instanceof FilterOperator) { + if (owi.getCandidateFilterOps().contains(operator)) { + removeOperator(operator); + } owi.getCandidateFilterOps().remove(operator); } if (operator.getChildOperators() != null) { - for (Operator child : operator.getChildOperators()) { + List> children = + Lists.newArrayList(operator.getChildOperators()); + for (Operator child : children) { removeCandidates(child, owi); } } } + private static void removeAllCandidates(OpWalkerInfo owi) { + for (FilterOperator operator : owi.getCandidateFilterOps()) { + removeOperator(operator); + } + owi.getCandidateFilterOps().clear(); + } + + private static void removeOperator(Operator operator) { + List> children = operator.getChildOperators(); + List> parents = operator.getParentOperators(); + for (Operator parent : parents) { + parent.getChildOperators().addAll(children); + parent.removeChild(operator); + } + for (Operator child : children) { + child.getParentOperators().addAll(parents); + child.removeParent(operator); + } + } + /** * Processor for Script Operator Prevents any predicates being pushed. */ @@ -386,6 +417,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, OpWalkerInfo owi = (OpWalkerInfo) procCtx; TableScanOperator tsOp = (TableScanOperator) nd; mergeWithChildrenPred(tsOp, owi, null, null); + if (HiveConf.getBoolVar(owi.getParseContext().getConf(), + HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { + // remove all the candidate filter operators + // when we get to the TS + removeAllCandidates(owi); + } ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp); return createFilter(tsOp, pushDownPreds, owi); } @@ -899,20 +936,9 @@ protected static Object createFilter(Operator op, if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { // remove the candidate filter ops - for (FilterOperator fop : owi.getCandidateFilterOps()) { - List> children = fop.getChildOperators(); - List> parents = fop.getParentOperators(); - for (Operator parent : parents) { - parent.getChildOperators().addAll(children); - parent.removeChild(fop); - } - for (Operator child : children) { - child.getParentOperators().addAll(parents); - child.removeParent(fop); - } - } - owi.getCandidateFilterOps().clear(); + removeCandidates(op, owi); } + // push down current ppd context to newly added filter ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op); if (walkerInfo != null) { diff --git ql/src/test/queries/clientpositive/multi_insert_with_join2.q ql/src/test/queries/clientpositive/multi_insert_with_join2.q new file mode 100644 index 0000000..1529fa2 --- /dev/null +++ ql/src/test/queries/clientpositive/multi_insert_with_join2.q @@ -0,0 +1,51 @@ +set hive.cbo.enable=false; + +CREATE TABLE T_A ( id STRING, val STRING ); +CREATE TABLE T_B ( id STRING, val STRING ); +CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING ); +CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING ); + +INSERT INTO TABLE T_A +VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103'); + +INSERT INTO TABLE T_B +VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104'); + +explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103'; + +explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val; + +explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val; + +explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2'; + +explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2'; diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out index 1381d91..f5c7c7f 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out @@ -56,12 +56,12 @@ INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-3 @@ -83,6 +83,24 @@ STAGE PLANS: Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g4 Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -125,26 +143,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g4 - Stage: Stage-0 + Stage: Stage-2 Move Operator tables: replace: true @@ -152,12 +152,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g2 + name: default.dest_g4 Stage: Stage-4 Stats-Aggr Operator - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -165,12 +165,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g3 + name: default.dest_g2 Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: true @@ -178,7 +178,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g4 + name: default.dest_g3 Stage: Stage-6 Stats-Aggr Operator @@ -278,12 +278,12 @@ INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-2 Stage-0 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-1 Stage-9 depends on stages: Stage-5 Stage-10 depends on stages: Stage-9 Stage-3 depends on stages: Stage-10 @@ -321,6 +321,24 @@ STAGE PLANS: Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g4 Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -363,26 +381,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g4 - Stage: Stage-0 + Stage: Stage-2 Move Operator tables: replace: true @@ -390,12 +390,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g2 + name: default.dest_g4 Stage: Stage-6 Stats-Aggr Operator - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -403,12 +403,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g3 + name: default.dest_g2 Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-2 + Stage: Stage-1 Move Operator tables: replace: true @@ -416,7 +416,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_g4 + name: default.dest_g3 Stage: Stage-8 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/multi_insert_gby.q.out ql/src/test/results/clientpositive/multi_insert_gby.q.out index 7c5e589..190f430 100644 --- ql/src/test/results/clientpositive/multi_insert_gby.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby.q.out @@ -215,10 +215,10 @@ INSERT OVERWRITE TABLE e2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-2 @@ -239,6 +239,24 @@ STAGE PLANS: Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -260,26 +278,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Group By Operator - aggregations: count() - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2 - Stage: Stage-0 + Stage: Stage-1 Move Operator tables: replace: true @@ -287,12 +287,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1 + name: default.e2 Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-1 + Stage: Stage-0 Move Operator tables: replace: true @@ -300,7 +300,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2 + name: default.e1 Stage: Stage-4 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/multi_insert_with_join2.q.out ql/src/test/results/clientpositive/multi_insert_with_join2.q.out new file mode 100644 index 0000000..e252de1 --- /dev/null +++ ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -0,0 +1,555 @@ +PREHOOK: query: CREATE TABLE T_A ( id STRING, val STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T_A +POSTHOOK: query: CREATE TABLE T_A ( id STRING, val STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T_A +PREHOOK: query: CREATE TABLE T_B ( id STRING, val STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T_B +POSTHOOK: query: CREATE TABLE T_B ( id STRING, val STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T_B +PREHOOK: query: CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@join_result_1 +POSTHOOK: query: CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@join_result_1 +PREHOOK: query: CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@join_result_3 +POSTHOOK: query: CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@join_result_3 +PREHOOK: query: INSERT INTO TABLE T_A +VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@t_a +POSTHOOK: query: INSERT INTO TABLE T_A +VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@t_a +POSTHOOK: Lineage: t_a.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t_a.val SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE T_B +VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@t_b +POSTHOOK: query: INSERT INTO TABLE T_B +VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@t_b +POSTHOOK: Lineage: t_b.id SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t_b.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 <> 'val_104') and (_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2') and (_col1 <> _col6)) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out index 279843b..29df9b9 100644 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out @@ -128,12 +128,15 @@ STAGE PLANS: sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan Reduce Output Operator sort order: @@ -490,7 +493,7 @@ POSTHOOK: Input: default@src_5 199 val_199 199 val_199 2 val_2 -Warning: Map Join MAPJOIN[55][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain from src b @@ -616,12 +619,15 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -775,12 +781,15 @@ STAGE PLANS: sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan Reduce Output Operator sort order: @@ -801,7 +810,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -Warning: Map Join MAPJOIN[55][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4