diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f0ea4c5..3e91e10 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8695,14 +8695,11 @@ private void mergeJoins(QB qb, QBJoinTree node, QBJoinTree target, int pos, int[ private ObjectPair findMergePos(QBJoinTree node, QBJoinTree target) { int res = -1; String leftAlias = node.getLeftAlias(); - if (leftAlias == null) { - return new ObjectPair(-1, null); - } ArrayList nodeCondn = node.getExpressions().get(0); ArrayList targetCondn = null; - if (leftAlias.equals(target.getLeftAlias())) { + if (leftAlias == null || leftAlias.equals(target.getLeftAlias())) { targetCondn = target.getExpressions().get(0); res = 0; } else { @@ -9998,8 +9995,9 @@ public Operator genPlan(QB qb, boolean skipAmbiguityCheck) } } - if (!disableJoinMerge) + if (!disableJoinMerge) { mergeJoinTree(qb); + } } // if any filters are present in the join tree, push them on top of the diff --git ql/src/test/queries/clientpositive/cross_join_merge.q ql/src/test/queries/clientpositive/cross_join_merge.q new file mode 100644 index 0000000..3ba4727 --- /dev/null +++ ql/src/test/queries/clientpositive/cross_join_merge.q @@ -0,0 +1,17 @@ +explain +select src1.key from src src1 join src src2 join src src3; + +explain +select src1.key from src src1 join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key; + +explain +select src1.key from src src1 join src src2 join src src3 where src1.key=src2.key and src1.key=src3.key; + +explain +select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key; + +explain +select src1.key from src src1 left outer join src src2 join src src3; + +explain +select src1.key from src src1 left outer join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key; diff --git ql/src/test/results/clientpositive/auto_join_stats.q.out ql/src/test/results/clientpositive/auto_join_stats.q.out index 587c39e..6a18a02 100644 --- ql/src/test/results/clientpositive/auto_join_stats.q.out +++ ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -294,21 +294,21 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-11 is a root stage , consists of Stage-13, Stage-14, Stage-1 - Stage-13 has a backup stage: Stage-1 - Stage-9 depends on stages: Stage-13 - Stage-12 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-7 depends on stages: Stage-12 - Stage-14 has a backup stage: Stage-1 - Stage-10 depends on stages: Stage-14 + Stage-9 is a root stage , consists of Stage-11, Stage-12, Stage-1 + Stage-11 has a backup stage: Stage-1 + Stage-7 depends on stages: Stage-11 + Stage-10 depends on stages: Stage-1, Stage-7, Stage-8 + Stage-6 depends on stages: Stage-10 + Stage-12 has a backup stage: Stage-1 + Stage-8 depends on stages: Stage-12 Stage-1 - Stage-0 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-11 + Stage: Stage-9 Conditional Operator - Stage: Stage-13 + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src1 @@ -331,7 +331,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-9 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -361,7 +361,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-12 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_2:smalltable @@ -386,6 +386,7 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) $hdt$_3:smalltable2 TableScan alias: smalltable2 @@ -401,38 +402,33 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Stage: Stage-14 + Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -455,7 +451,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-10 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/auto_join_stats2.q.out ql/src/test/results/clientpositive/auto_join_stats2.q.out index 35776f5..28fd569 100644 --- ql/src/test/results/clientpositive/auto_join_stats2.q.out +++ ql/src/test/results/clientpositive/auto_join_stats2.q.out @@ -157,12 +157,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-7 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-7 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -205,6 +205,7 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) $hdt$_3:smalltable2 TableScan alias: smalltable2 @@ -220,8 +221,9 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -245,26 +247,20 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out index cfb95be..6b40ee8 100644 --- ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out +++ ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out @@ -22,8 +22,7 @@ POSTHOOK: query: CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@T2 -Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM ( SELECT tmp1.name as name FROM ( SELECT name, 'MMM' AS n FROM T1) tmp1 diff --git ql/src/test/results/clientpositive/cross_join_merge.q.out ql/src/test/results/clientpositive/cross_join_merge.q.out new file mode 100644 index 0000000..f15dd17 --- /dev/null +++ ql/src/test/results/clientpositive/cross_join_merge.q.out @@ -0,0 +1,490 @@ +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select src1.key from src src1 join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 where src1.key=src2.key and src1.key=src3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 join src src2 join src src3 where src1.key=src2.key and src1.key=src3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (5.0 = UDFToDouble(key)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select src1.key from src src1 left outer join src src2 join src src3 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 left outer join src src2 join src src3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 0 to 2 + keys: + 0 + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select src1.key from src src1 left outer join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key from src src1 left outer join src src2 on src1.key=src2.key join src src3 on src1.key=src3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/join42.q.out ql/src/test/results/clientpositive/join42.q.out index 4715bb5..e52f1fc 100644 --- ql/src/test/results/clientpositive/join42.q.out +++ ql/src/test/results/clientpositive/join42.q.out @@ -80,8 +80,7 @@ POSTHOOK: Output: default@acct POSTHOOK: Lineage: acct.acc_n EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: acct.aid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: acct.brn EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: --[HIVE-10841] (WHERE col is not null) does not work sometimes for queries with many JOIN statements explain select acct.ACC_N, @@ -114,8 +113,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -146,30 +144,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int) TableScan alias: fr Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -185,11 +159,13 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 1 + 2 outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -197,7 +173,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -205,7 +181,7 @@ STAGE PLANS: key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int) TableScan alias: a @@ -256,7 +232,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -308,8 +284,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select acct.ACC_N, acct.brn diff --git ql/src/test/results/clientpositive/perf/query28.q.out ql/src/test/results/clientpositive/perf/query28.q.out index aed4808..8196db5 100644 --- ql/src/test/results/clientpositive/perf/query28.q.out +++ ql/src/test/results/clientpositive/perf/query28.q.out @@ -1,8 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[71][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[72][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[73][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[74][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[75][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from (select avg(ss_list_price) B1_LP ,count(ss_list_price) B1_CNT @@ -110,126 +106,106 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 11 <- Map 10 (SIMPLE_EDGE) Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 - File Output Operator [FS_64] - Limit [LIM_63] (rows=1 width=215) + Reducer 3 + File Output Operator [FS_56] + Limit [LIM_55] (rows=5 width=149) Number of rows:100 - Select Operator [SEL_62] (rows=1 width=215) + Select Operator [SEL_54] (rows=5 width=149) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - Merge Join Operator [MERGEJOIN_75] (rows=1 width=215) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_60] + Merge Join Operator [MERGEJOIN_63] (rows=5 width=149) + Conds:(Inner),(Inner),(Inner),(Inner),(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_51] + Group By Operator [GBY_33] (rows=1 width=136) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=1 width=0) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_30] (rows=1 width=0) + Output:["ss_list_price"] + Filter Operator [FIL_61] (rows=1 width=0) + predicate:(ss_quantity BETWEEN 11 AND 15 and (ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24)) + TableScan [TS_28] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_52] Group By Operator [GBY_40] (rows=1 width=136) Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 16 [SIMPLE_EDGE] + <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_39] Group By Operator [GBY_38] (rows=1 width=0) Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price Select Operator [SEL_37] (rows=1 width=0) Output:["ss_list_price"] - Filter Operator [FIL_70] (rows=1 width=0) + Filter Operator [FIL_62] (rows=1 width=0) predicate:(ss_quantity BETWEEN 6 AND 10 and (ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52)) TableScan [TS_35] (rows=1 width=0) default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_59] - Merge Join Operator [MERGEJOIN_74] (rows=1 width=196) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_57] - Group By Operator [GBY_33] (rows=1 width=136) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_32] - Group By Operator [GBY_31] (rows=1 width=0) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_30] (rows=1 width=0) - Output:["ss_list_price"] - Filter Operator [FIL_69] (rows=1 width=0) - predicate:(ss_quantity BETWEEN 11 AND 15 and (ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24)) - TableScan [TS_28] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_56] - Merge Join Operator [MERGEJOIN_73] (rows=1 width=179) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_54] - Group By Operator [GBY_26] (rows=1 width=136) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_25] - Group By Operator [GBY_24] (rows=1 width=0) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_23] (rows=1 width=0) - Output:["ss_list_price"] - Filter Operator [FIL_68] (rows=1 width=0) - predicate:(ss_quantity BETWEEN 16 AND 20 and (ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100)) - TableScan [TS_21] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_53] - Merge Join Operator [MERGEJOIN_72] (rows=1 width=163) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_51] - Group By Operator [GBY_19] (rows=1 width=136) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_18] - Group By Operator [GBY_17] (rows=1 width=0) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_16] (rows=1 width=0) - Output:["ss_list_price"] - Filter Operator [FIL_67] (rows=1 width=0) - predicate:(ss_quantity BETWEEN 21 AND 25 and (ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58)) - TableScan [TS_14] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_50] - Merge Join Operator [MERGEJOIN_71] (rows=1 width=149) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_47] - Group By Operator [GBY_5] (rows=1 width=136) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_4] - Group By Operator [GBY_3] (rows=1 width=0) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_2] (rows=1 width=0) - Output:["ss_list_price"] - Filter Operator [FIL_65] (rows=1 width=0) - predicate:(ss_quantity BETWEEN 0 AND 5 and (ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34)) - TableScan [TS_0] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_48] - Group By Operator [GBY_12] (rows=1 width=136) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_11] - Group By Operator [GBY_10] (rows=1 width=0) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_9] (rows=1 width=0) - Output:["ss_list_price"] - Filter Operator [FIL_66] (rows=1 width=0) - predicate:(ss_quantity BETWEEN 26 AND 30 and (ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62)) - TableScan [TS_7] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_47] + Group By Operator [GBY_5] (rows=1 width=136) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=0) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_2] (rows=1 width=0) + Output:["ss_list_price"] + Filter Operator [FIL_57] (rows=1 width=0) + predicate:(ss_quantity BETWEEN 0 AND 5 and (ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34)) + TableScan [TS_0] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_48] + Group By Operator [GBY_12] (rows=1 width=136) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=0) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_9] (rows=1 width=0) + Output:["ss_list_price"] + Filter Operator [FIL_58] (rows=1 width=0) + predicate:(ss_quantity BETWEEN 26 AND 30 and (ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62)) + TableScan [TS_7] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_49] + Group By Operator [GBY_19] (rows=1 width=136) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=0) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_16] (rows=1 width=0) + Output:["ss_list_price"] + Filter Operator [FIL_59] (rows=1 width=0) + predicate:(ss_quantity BETWEEN 21 AND 25 and (ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58)) + TableScan [TS_14] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_50] + Group By Operator [GBY_26] (rows=1 width=136) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_25] + Group By Operator [GBY_24] (rows=1 width=0) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_23] (rows=1 width=0) + Output:["ss_list_price"] + Filter Operator [FIL_60] (rows=1 width=0) + predicate:(ss_quantity BETWEEN 16 AND 20 and (ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100)) + TableScan [TS_21] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] diff --git ql/src/test/results/clientpositive/perf/query65.q.out ql/src/test/results/clientpositive/perf/query65.q.out index 9799fa9..37bb1b3 100644 --- ql/src/test/results/clientpositive/perf/query65.q.out +++ ql/src/test/results/clientpositive/perf/query65.q.out @@ -1,5 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[72][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[75][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[71][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select s_store_name, i_item_desc, @@ -79,76 +78,91 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 - File Output Operator [FS_55] - Limit [LIM_54] (rows=100 width=1436) + Reducer 4 + File Output Operator [FS_53] + Limit [LIM_52] (rows=100 width=1436) Number of rows:100 - Select Operator [SEL_53] (rows=204974 width=1436) + Select Operator [SEL_51] (rows=372680 width=1436) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_52] - Select Operator [SEL_51] (rows=204974 width=1436) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_50] + Select Operator [SEL_49] (rows=372680 width=1436) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_50] (rows=204974 width=1436) + Filter Operator [FIL_48] (rows=372680 width=1436) predicate:(_col11 <= CAST( (0.1 * UDFToDouble(_col8)) AS decimal(30,15))) - Merge Join Operator [MERGEJOIN_76] (rows=614922 width=1436) - Conds:RS_47._col7, _col0, _col2=RS_48._col0, _col0, _col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col8","_col11"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_48] + Merge Join Operator [MERGEJOIN_73] (rows=1118040 width=1436) + Conds:RS_45._col7, _col0, _col2=RS_46._col0, _col0, _col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col8","_col11"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col0, _col0, _col1 Select Operator [SEL_38] (rows=20088 width=1119) Output:["_col0","_col1","_col2"] Group By Operator [GBY_37] (rows=20088 width=1119) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 13 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col0, _col1 Group By Operator [GBY_35] (rows=40176 width=1119) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col1, _col2 - Merge Join Operator [MERGEJOIN_74] (rows=40176 width=1119) + Merge Join Operator [MERGEJOIN_72] (rows=40176 width=1119) Conds:RS_31._col0=RS_32._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 12 [SIMPLE_EDGE] + <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col0 Select Operator [SEL_27] (rows=1 width=0) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_70] (rows=1 width=0) + Filter Operator [FIL_68] (rows=1 width=0) predicate:((ss_sold_date_sk is not null and ss_store_sk is not null) and ss_item_sk is not null) TableScan [TS_25] (rows=1 width=0) default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Map 15 [SIMPLE_EDGE] + <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col0 Select Operator [SEL_30] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_71] (rows=36524 width=1119) + Filter Operator [FIL_69] (rows=36524 width=1119) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_28] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_47] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_45] PartitionCols:_col7, _col0, _col2 - Merge Join Operator [MERGEJOIN_75] (rows=559020 width=1436) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_45] + Merge Join Operator [MERGEJOIN_71] (rows=1016400 width=1436) + Conds:(Inner),(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_41] + Select Operator [SEL_2] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_64] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_0] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_42] + Select Operator [SEL_5] (rows=462000 width=1436) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_65] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_3] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc","i_current_price","i_wholesale_cost","i_brand"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_43] Group By Operator [GBY_23] (rows=10044 width=1119) Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 - <-Reducer 9 [SIMPLE_EDGE] + <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 Group By Operator [GBY_21] (rows=20088 width=1119) @@ -157,49 +171,29 @@ Stage-0 Output:["_col1","_col2"] Group By Operator [GBY_18] (rows=20088 width=1119) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0, _col1 Group By Operator [GBY_16] (rows=40176 width=1119) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col1, _col2 - Merge Join Operator [MERGEJOIN_73] (rows=40176 width=1119) + Merge Join Operator [MERGEJOIN_70] (rows=40176 width=1119) Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] + <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_11] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_69] (rows=36524 width=1119) + Filter Operator [FIL_67] (rows=36524 width=1119) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Map 7 [SIMPLE_EDGE] + <-Map 6 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0 Select Operator [SEL_8] (rows=1 width=0) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_68] (rows=1 width=0) + Filter Operator [FIL_66] (rows=1 width=0) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_6] (rows=1 width=0) default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] - Merge Join Operator [MERGEJOIN_72] (rows=508200 width=1436) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_41] - Select Operator [SEL_2] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_66] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_0] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_42] - Select Operator [SEL_5] (rows=462000 width=1436) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_67] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc","i_current_price","i_wholesale_cost","i_brand"] diff --git ql/src/test/results/clientpositive/perf/query88.q.out ql/src/test/results/clientpositive/perf/query88.q.out index 89a7f52..11f907f 100644 --- ql/src/test/results/clientpositive/perf/query88.q.out +++ ql/src/test/results/clientpositive/perf/query88.q.out @@ -1,10 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[366][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product -Warning: Shuffle Join MERGEJOIN[368][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 8' is a cross product -Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[370][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 10' is a cross product -Warning: Shuffle Join MERGEJOIN[371][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[372][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 12' is a cross product +Warning: Shuffle Join MERGEJOIN[354][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain select * from @@ -192,523 +186,493 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 52 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 60 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 68 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Map 23 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) -Reducer 26 <- Map 30 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Map 31 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (SIMPLE_EDGE) -Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 33 <- Map 32 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) -Reducer 34 <- Map 38 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 35 <- Map 39 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) -Reducer 36 <- Reducer 35 (SIMPLE_EDGE) -Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) -Reducer 42 <- Map 46 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) -Reducer 43 <- Map 47 (SIMPLE_EDGE), Reducer 42 (SIMPLE_EDGE) -Reducer 44 <- Reducer 43 (SIMPLE_EDGE) -Reducer 49 <- Map 48 (SIMPLE_EDGE), Map 53 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 17 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 20 <- Map 24 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 25 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 27 <- Map 26 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Reducer 28 <- Map 32 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Map 33 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (SIMPLE_EDGE) +Reducer 35 <- Map 34 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) +Reducer 36 <- Map 40 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE) +Reducer 37 <- Map 41 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) +Reducer 38 <- Reducer 37 (SIMPLE_EDGE) +Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 43 <- Map 42 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE) +Reducer 44 <- Map 48 (SIMPLE_EDGE), Reducer 43 (SIMPLE_EDGE) +Reducer 45 <- Map 49 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) +Reducer 46 <- Reducer 45 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 50 <- Map 54 (SIMPLE_EDGE), Reducer 49 (SIMPLE_EDGE) -Reducer 51 <- Map 55 (SIMPLE_EDGE), Reducer 50 (SIMPLE_EDGE) -Reducer 52 <- Reducer 51 (SIMPLE_EDGE) -Reducer 57 <- Map 56 (SIMPLE_EDGE), Map 61 (SIMPLE_EDGE) -Reducer 58 <- Map 62 (SIMPLE_EDGE), Reducer 57 (SIMPLE_EDGE) -Reducer 59 <- Map 63 (SIMPLE_EDGE), Reducer 58 (SIMPLE_EDGE) -Reducer 6 <- Reducer 20 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 60 <- Reducer 59 (SIMPLE_EDGE) -Reducer 65 <- Map 64 (SIMPLE_EDGE), Map 69 (SIMPLE_EDGE) -Reducer 66 <- Map 70 (SIMPLE_EDGE), Reducer 65 (SIMPLE_EDGE) -Reducer 67 <- Map 71 (SIMPLE_EDGE), Reducer 66 (SIMPLE_EDGE) -Reducer 68 <- Reducer 67 (SIMPLE_EDGE) -Reducer 7 <- Reducer 28 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 36 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 44 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 51 <- Map 50 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE) +Reducer 52 <- Map 56 (SIMPLE_EDGE), Reducer 51 (SIMPLE_EDGE) +Reducer 53 <- Map 57 (SIMPLE_EDGE), Reducer 52 (SIMPLE_EDGE) +Reducer 54 <- Reducer 53 (SIMPLE_EDGE) +Reducer 59 <- Map 58 (SIMPLE_EDGE), Map 63 (SIMPLE_EDGE) +Reducer 6 <- Reducer 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE), Reducer 46 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 54 (SIMPLE_EDGE), Reducer 62 (SIMPLE_EDGE) +Reducer 60 <- Map 64 (SIMPLE_EDGE), Reducer 59 (SIMPLE_EDGE) +Reducer 61 <- Map 65 (SIMPLE_EDGE), Reducer 60 (SIMPLE_EDGE) +Reducer 62 <- Reducer 61 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 12 - File Output Operator [FS_237] - Merge Join Operator [MERGEJOIN_372] (rows=1 width=8) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_233] - Merge Join Operator [MERGEJOIN_371] (rows=1 width=8) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_230] - Merge Join Operator [MERGEJOIN_370] (rows=1 width=8) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 52 [SIMPLE_EDGE] - SHUFFLE [RS_228] - Group By Operator [GBY_154] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 51 [SIMPLE_EDGE] - SHUFFLE [RS_153] - Group By Operator [GBY_152] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_359] (rows=17424 width=471) - Conds:RS_148._col2=RS_149._col0(Inner) - <-Map 55 [SIMPLE_EDGE] - SHUFFLE [RS_149] + Reducer 6 + File Output Operator [FS_225] + Merge Join Operator [MERGEJOIN_354] (rows=7 width=8) + Conds:(Inner),(Inner),(Inner),(Inner),(Inner),(Inner),(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_216] + Group By Operator [GBY_50] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_49] + Group By Operator [GBY_48] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_335] (rows=17424 width=471) + Conds:RS_44._col2=RS_45._col0(Inner) + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Select Operator [SEL_37] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_305] (rows=852 width=1910) + predicate:((s_store_name = 'ese') and s_store_sk is not null) + TableScan [TS_35] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_334] (rows=15840 width=471) + Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col2"] + <-Map 16 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_304] (rows=14400 width=471) + predicate:(((t_hour = 9) and (t_minute < 30)) and t_time_sk is not null) + TableScan [TS_32] (rows=86400 width=471) + default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_333] (rows=3960 width=107) + Conds:RS_38._col1=RS_39._col0(Inner),Output:["_col0","_col2"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col1 + Select Operator [SEL_28] (rows=1 width=0) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_302] (rows=1 width=0) + predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) + TableScan [TS_26] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=3600 width=107) + Output:["_col0"] + Filter Operator [FIL_303] (rows=3600 width=107) + predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) + TableScan [TS_29] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_217] + Group By Operator [GBY_76] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_75] + Group By Operator [GBY_74] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_338] (rows=17424 width=471) + Conds:RS_70._col2=RS_71._col0(Inner) + <-Map 25 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Select Operator [SEL_63] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_309] (rows=852 width=1910) + predicate:((s_store_name = 'ese') and s_store_sk is not null) + TableScan [TS_61] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_337] (rows=15840 width=471) + Conds:RS_67._col0=RS_68._col0(Inner),Output:["_col2"] + <-Map 24 [SIMPLE_EDGE] + SHUFFLE [RS_68] + PartitionCols:_col0 + Select Operator [SEL_60] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_308] (rows=14400 width=471) + predicate:(((t_hour = 9) and (t_minute >= 30)) and t_time_sk is not null) + TableScan [TS_58] (rows=86400 width=471) + default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_336] (rows=3960 width=107) + Conds:RS_64._col1=RS_65._col0(Inner),Output:["_col0","_col2"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col1 + Select Operator [SEL_54] (rows=1 width=0) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_306] (rows=1 width=0) + predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) + TableScan [TS_52] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Map 23 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0 + Select Operator [SEL_57] (rows=3600 width=107) + Output:["_col0"] + Filter Operator [FIL_307] (rows=3600 width=107) + predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) + TableScan [TS_55] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_218] + Group By Operator [GBY_102] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_101] + Group By Operator [GBY_100] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_341] (rows=17424 width=471) + Conds:RS_96._col2=RS_97._col0(Inner) + <-Map 33 [SIMPLE_EDGE] + SHUFFLE [RS_97] + PartitionCols:_col0 + Select Operator [SEL_89] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_313] (rows=852 width=1910) + predicate:((s_store_name = 'ese') and s_store_sk is not null) + TableScan [TS_87] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_96] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_340] (rows=15840 width=471) + Conds:RS_93._col0=RS_94._col0(Inner),Output:["_col2"] + <-Map 32 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col0 + Select Operator [SEL_86] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_312] (rows=14400 width=471) + predicate:(((t_hour = 10) and (t_minute < 30)) and t_time_sk is not null) + TableScan [TS_84] (rows=86400 width=471) + default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_339] (rows=3960 width=107) + Conds:RS_90._col1=RS_91._col0(Inner),Output:["_col0","_col2"] + <-Map 26 [SIMPLE_EDGE] + SHUFFLE [RS_90] + PartitionCols:_col1 + Select Operator [SEL_80] (rows=1 width=0) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_310] (rows=1 width=0) + predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) + TableScan [TS_78] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Map 31 [SIMPLE_EDGE] + SHUFFLE [RS_91] + PartitionCols:_col0 + Select Operator [SEL_83] (rows=3600 width=107) + Output:["_col0"] + Filter Operator [FIL_311] (rows=3600 width=107) + predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) + TableScan [TS_81] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 38 [SIMPLE_EDGE] + SHUFFLE [RS_219] + Group By Operator [GBY_128] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 37 [SIMPLE_EDGE] + SHUFFLE [RS_127] + Group By Operator [GBY_126] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_344] (rows=17424 width=471) + Conds:RS_122._col2=RS_123._col0(Inner) + <-Map 41 [SIMPLE_EDGE] + SHUFFLE [RS_123] + PartitionCols:_col0 + Select Operator [SEL_115] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_317] (rows=852 width=1910) + predicate:((s_store_name = 'ese') and s_store_sk is not null) + TableScan [TS_113] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_122] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_343] (rows=15840 width=471) + Conds:RS_119._col0=RS_120._col0(Inner),Output:["_col2"] + <-Map 40 [SIMPLE_EDGE] + SHUFFLE [RS_120] + PartitionCols:_col0 + Select Operator [SEL_112] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_316] (rows=14400 width=471) + predicate:(((t_hour = 10) and (t_minute >= 30)) and t_time_sk is not null) + TableScan [TS_110] (rows=86400 width=471) + default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_119] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_342] (rows=3960 width=107) + Conds:RS_116._col1=RS_117._col0(Inner),Output:["_col0","_col2"] + <-Map 34 [SIMPLE_EDGE] + SHUFFLE [RS_116] + PartitionCols:_col1 + Select Operator [SEL_106] (rows=1 width=0) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_314] (rows=1 width=0) + predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) + TableScan [TS_104] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Map 39 [SIMPLE_EDGE] + SHUFFLE [RS_117] + PartitionCols:_col0 + Select Operator [SEL_109] (rows=3600 width=107) + Output:["_col0"] + Filter Operator [FIL_315] (rows=3600 width=107) + predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) + TableScan [TS_107] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 46 [SIMPLE_EDGE] + SHUFFLE [RS_220] + Group By Operator [GBY_154] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 45 [SIMPLE_EDGE] + SHUFFLE [RS_153] + Group By Operator [GBY_152] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_347] (rows=17424 width=471) + Conds:RS_148._col2=RS_149._col0(Inner) + <-Map 49 [SIMPLE_EDGE] + SHUFFLE [RS_149] + PartitionCols:_col0 + Select Operator [SEL_141] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_321] (rows=852 width=1910) + predicate:((s_store_name = 'ese') and s_store_sk is not null) + TableScan [TS_139] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] + <-Reducer 44 [SIMPLE_EDGE] + SHUFFLE [RS_148] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_346] (rows=15840 width=471) + Conds:RS_145._col0=RS_146._col0(Inner),Output:["_col2"] + <-Map 48 [SIMPLE_EDGE] + SHUFFLE [RS_146] + PartitionCols:_col0 + Select Operator [SEL_138] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_320] (rows=14400 width=471) + predicate:(((t_hour = 11) and (t_minute < 30)) and t_time_sk is not null) + TableScan [TS_136] (rows=86400 width=471) + default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] + <-Reducer 43 [SIMPLE_EDGE] + SHUFFLE [RS_145] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_345] (rows=3960 width=107) + Conds:RS_142._col1=RS_143._col0(Inner),Output:["_col0","_col2"] + <-Map 42 [SIMPLE_EDGE] + SHUFFLE [RS_142] + PartitionCols:_col1 + Select Operator [SEL_132] (rows=1 width=0) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_318] (rows=1 width=0) + predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) + TableScan [TS_130] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Map 47 [SIMPLE_EDGE] + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_141] (rows=852 width=1910) + Select Operator [SEL_135] (rows=3600 width=107) Output:["_col0"] - Filter Operator [FIL_333] (rows=852 width=1910) - predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_139] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 50 [SIMPLE_EDGE] - SHUFFLE [RS_148] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_358] (rows=15840 width=471) - Conds:RS_145._col0=RS_146._col0(Inner),Output:["_col2"] - <-Map 54 [SIMPLE_EDGE] - SHUFFLE [RS_146] - PartitionCols:_col0 - Select Operator [SEL_138] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_332] (rows=14400 width=471) - predicate:(((t_hour = 11) and (t_minute < 30)) and t_time_sk is not null) - TableScan [TS_136] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 49 [SIMPLE_EDGE] - SHUFFLE [RS_145] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_357] (rows=3960 width=107) - Conds:RS_142._col1=RS_143._col0(Inner),Output:["_col0","_col2"] - <-Map 48 [SIMPLE_EDGE] - SHUFFLE [RS_142] - PartitionCols:_col1 - Select Operator [SEL_132] (rows=1 width=0) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_330] (rows=1 width=0) - predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) - TableScan [TS_130] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 53 [SIMPLE_EDGE] - SHUFFLE [RS_143] - PartitionCols:_col0 - Select Operator [SEL_135] (rows=3600 width=107) - Output:["_col0"] - Filter Operator [FIL_331] (rows=3600 width=107) - predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_133] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_227] - Merge Join Operator [MERGEJOIN_369] (rows=1 width=8) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 44 [SIMPLE_EDGE] - SHUFFLE [RS_225] - Group By Operator [GBY_128] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 43 [SIMPLE_EDGE] - SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_356] (rows=17424 width=471) - Conds:RS_122._col2=RS_123._col0(Inner) - <-Map 47 [SIMPLE_EDGE] - SHUFFLE [RS_123] - PartitionCols:_col0 - Select Operator [SEL_115] (rows=852 width=1910) - Output:["_col0"] - Filter Operator [FIL_329] (rows=852 width=1910) - predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_113] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_122] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_355] (rows=15840 width=471) - Conds:RS_119._col0=RS_120._col0(Inner),Output:["_col2"] - <-Map 46 [SIMPLE_EDGE] - SHUFFLE [RS_120] - PartitionCols:_col0 - Select Operator [SEL_112] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_328] (rows=14400 width=471) - predicate:(((t_hour = 10) and (t_minute >= 30)) and t_time_sk is not null) - TableScan [TS_110] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 41 [SIMPLE_EDGE] - SHUFFLE [RS_119] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_354] (rows=3960 width=107) - Conds:RS_116._col1=RS_117._col0(Inner),Output:["_col0","_col2"] - <-Map 40 [SIMPLE_EDGE] - SHUFFLE [RS_116] - PartitionCols:_col1 - Select Operator [SEL_106] (rows=1 width=0) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_326] (rows=1 width=0) - predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) - TableScan [TS_104] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 45 [SIMPLE_EDGE] - SHUFFLE [RS_117] - PartitionCols:_col0 - Select Operator [SEL_109] (rows=3600 width=107) - Output:["_col0"] - Filter Operator [FIL_327] (rows=3600 width=107) - predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_107] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_224] - Merge Join Operator [MERGEJOIN_368] (rows=1 width=8) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Reducer 36 [SIMPLE_EDGE] - SHUFFLE [RS_222] - Group By Operator [GBY_102] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 35 [SIMPLE_EDGE] - SHUFFLE [RS_101] - Group By Operator [GBY_100] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_353] (rows=17424 width=471) - Conds:RS_96._col2=RS_97._col0(Inner) - <-Map 39 [SIMPLE_EDGE] - SHUFFLE [RS_97] - PartitionCols:_col0 - Select Operator [SEL_89] (rows=852 width=1910) - Output:["_col0"] - Filter Operator [FIL_325] (rows=852 width=1910) - predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_87] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_96] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_352] (rows=15840 width=471) - Conds:RS_93._col0=RS_94._col0(Inner),Output:["_col2"] - <-Map 38 [SIMPLE_EDGE] - SHUFFLE [RS_94] - PartitionCols:_col0 - Select Operator [SEL_86] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_324] (rows=14400 width=471) - predicate:(((t_hour = 10) and (t_minute < 30)) and t_time_sk is not null) - TableScan [TS_84] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_93] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_351] (rows=3960 width=107) - Conds:RS_90._col1=RS_91._col0(Inner),Output:["_col0","_col2"] - <-Map 32 [SIMPLE_EDGE] - SHUFFLE [RS_90] - PartitionCols:_col1 - Select Operator [SEL_80] (rows=1 width=0) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_322] (rows=1 width=0) - predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) - TableScan [TS_78] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 37 [SIMPLE_EDGE] - SHUFFLE [RS_91] - PartitionCols:_col0 - Select Operator [SEL_83] (rows=3600 width=107) - Output:["_col0"] - Filter Operator [FIL_323] (rows=3600 width=107) - predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_81] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_221] - Merge Join Operator [MERGEJOIN_367] (rows=1 width=8) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_219] - Group By Operator [GBY_76] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 27 [SIMPLE_EDGE] - SHUFFLE [RS_75] - Group By Operator [GBY_74] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_350] (rows=17424 width=471) - Conds:RS_70._col2=RS_71._col0(Inner) - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_71] - PartitionCols:_col0 - Select Operator [SEL_63] (rows=852 width=1910) - Output:["_col0"] - Filter Operator [FIL_321] (rows=852 width=1910) - predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_61] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_70] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_349] (rows=15840 width=471) - Conds:RS_67._col0=RS_68._col0(Inner),Output:["_col2"] - <-Map 30 [SIMPLE_EDGE] - SHUFFLE [RS_68] - PartitionCols:_col0 - Select Operator [SEL_60] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_320] (rows=14400 width=471) - predicate:(((t_hour = 9) and (t_minute >= 30)) and t_time_sk is not null) - TableScan [TS_58] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_67] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_348] (rows=3960 width=107) - Conds:RS_64._col1=RS_65._col0(Inner),Output:["_col0","_col2"] - <-Map 24 [SIMPLE_EDGE] - SHUFFLE [RS_64] - PartitionCols:_col1 - Select Operator [SEL_54] (rows=1 width=0) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_318] (rows=1 width=0) - predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) - TableScan [TS_52] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 29 [SIMPLE_EDGE] - SHUFFLE [RS_65] - PartitionCols:_col0 - Select Operator [SEL_57] (rows=3600 width=107) - Output:["_col0"] - Filter Operator [FIL_319] (rows=3600 width=107) - predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_55] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_218] - Merge Join Operator [MERGEJOIN_366] (rows=1 width=8) - Conds:(Inner),Output:["_col0","_col1"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_216] - Group By Operator [GBY_50] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_49] - Group By Operator [GBY_48] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_347] (rows=17424 width=471) - Conds:RS_44._col2=RS_45._col0(Inner) - <-Map 23 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col0 - Select Operator [SEL_37] (rows=852 width=1910) - Output:["_col0"] - Filter Operator [FIL_317] (rows=852 width=1910) - predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_35] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_346] (rows=15840 width=471) - Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col2"] - <-Map 22 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col0 - Select Operator [SEL_34] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_316] (rows=14400 width=471) - predicate:(((t_hour = 9) and (t_minute < 30)) and t_time_sk is not null) - TableScan [TS_32] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_345] (rows=3960 width=107) - Conds:RS_38._col1=RS_39._col0(Inner),Output:["_col0","_col2"] - <-Map 16 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col1 - Select Operator [SEL_28] (rows=1 width=0) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_314] (rows=1 width=0) - predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) - TableScan [TS_26] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 21 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col0 - Select Operator [SEL_31] (rows=3600 width=107) - Output:["_col0"] - Filter Operator [FIL_315] (rows=3600 width=107) - predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_29] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_215] - Group By Operator [GBY_24] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] - Group By Operator [GBY_22] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_344] (rows=17424 width=471) - Conds:RS_18._col2=RS_19._col0(Inner) - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=852 width=1910) - Output:["_col0"] - Filter Operator [FIL_313] (rows=852 width=1910) - predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_343] (rows=15840 width=471) - Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col2"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_312] (rows=14400 width=471) - predicate:(((t_hour = 8) and (t_minute >= 30)) and t_time_sk is not null) - TableScan [TS_6] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_342] (rows=3960 width=107) - Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col2"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=1 width=0) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_310] (rows=1 width=0) - predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) - TableScan [TS_0] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=3600 width=107) - Output:["_col0"] - Filter Operator [FIL_311] (rows=3600 width=107) - predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_3] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 60 [SIMPLE_EDGE] - SHUFFLE [RS_231] - Group By Operator [GBY_180] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 59 [SIMPLE_EDGE] - SHUFFLE [RS_179] - Group By Operator [GBY_178] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_362] (rows=17424 width=471) - Conds:RS_174._col2=RS_175._col0(Inner) - <-Map 63 [SIMPLE_EDGE] - SHUFFLE [RS_175] + Filter Operator [FIL_319] (rows=3600 width=107) + predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) + TableScan [TS_133] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_215] + Group By Operator [GBY_24] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + Group By Operator [GBY_22] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_332] (rows=17424 width=471) + Conds:RS_18._col2=RS_19._col0(Inner) + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_301] (rows=852 width=1910) + predicate:((s_store_name = 'ese') and s_store_sk is not null) + TableScan [TS_9] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_331] (rows=15840 width=471) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col2"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_167] (rows=852 width=1910) + Select Operator [SEL_8] (rows=14400 width=471) Output:["_col0"] - Filter Operator [FIL_337] (rows=852 width=1910) - predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_165] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 58 [SIMPLE_EDGE] - SHUFFLE [RS_174] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_361] (rows=15840 width=471) - Conds:RS_171._col0=RS_172._col0(Inner),Output:["_col2"] - <-Map 62 [SIMPLE_EDGE] - SHUFFLE [RS_172] + Filter Operator [FIL_300] (rows=14400 width=471) + predicate:(((t_hour = 8) and (t_minute >= 30)) and t_time_sk is not null) + TableScan [TS_6] (rows=86400 width=471) + default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_330] (rows=3960 width=107) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=1 width=0) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_298] (rows=1 width=0) + predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) + TableScan [TS_0] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_164] (rows=14400 width=471) + Select Operator [SEL_5] (rows=3600 width=107) Output:["_col0"] - Filter Operator [FIL_336] (rows=14400 width=471) - predicate:(((t_hour = 11) and (t_minute >= 30)) and t_time_sk is not null) - TableScan [TS_162] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 57 [SIMPLE_EDGE] - SHUFFLE [RS_171] + Filter Operator [FIL_299] (rows=3600 width=107) + predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) + TableScan [TS_3] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 54 [SIMPLE_EDGE] + SHUFFLE [RS_221] + Group By Operator [GBY_180] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 53 [SIMPLE_EDGE] + SHUFFLE [RS_179] + Group By Operator [GBY_178] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_350] (rows=17424 width=471) + Conds:RS_174._col2=RS_175._col0(Inner) + <-Map 57 [SIMPLE_EDGE] + SHUFFLE [RS_175] + PartitionCols:_col0 + Select Operator [SEL_167] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_325] (rows=852 width=1910) + predicate:((s_store_name = 'ese') and s_store_sk is not null) + TableScan [TS_165] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] + <-Reducer 52 [SIMPLE_EDGE] + SHUFFLE [RS_174] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_349] (rows=15840 width=471) + Conds:RS_171._col0=RS_172._col0(Inner),Output:["_col2"] + <-Map 56 [SIMPLE_EDGE] + SHUFFLE [RS_172] + PartitionCols:_col0 + Select Operator [SEL_164] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_324] (rows=14400 width=471) + predicate:(((t_hour = 11) and (t_minute >= 30)) and t_time_sk is not null) + TableScan [TS_162] (rows=86400 width=471) + default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] + <-Reducer 51 [SIMPLE_EDGE] + SHUFFLE [RS_171] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_348] (rows=3960 width=107) + Conds:RS_168._col1=RS_169._col0(Inner),Output:["_col0","_col2"] + <-Map 50 [SIMPLE_EDGE] + SHUFFLE [RS_168] + PartitionCols:_col1 + Select Operator [SEL_158] (rows=1 width=0) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_322] (rows=1 width=0) + predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) + TableScan [TS_156] (rows=1 width=0) + default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] + <-Map 55 [SIMPLE_EDGE] + SHUFFLE [RS_169] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_360] (rows=3960 width=107) - Conds:RS_168._col1=RS_169._col0(Inner),Output:["_col0","_col2"] - <-Map 56 [SIMPLE_EDGE] - SHUFFLE [RS_168] - PartitionCols:_col1 - Select Operator [SEL_158] (rows=1 width=0) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_334] (rows=1 width=0) - predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) - TableScan [TS_156] (rows=1 width=0) - default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 61 [SIMPLE_EDGE] - SHUFFLE [RS_169] - PartitionCols:_col0 - Select Operator [SEL_161] (rows=3600 width=107) - Output:["_col0"] - Filter Operator [FIL_335] (rows=3600 width=107) - predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_159] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 68 [SIMPLE_EDGE] - SHUFFLE [RS_234] + Select Operator [SEL_161] (rows=3600 width=107) + Output:["_col0"] + Filter Operator [FIL_323] (rows=3600 width=107) + predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) + TableScan [TS_159] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] + <-Reducer 62 [SIMPLE_EDGE] + SHUFFLE [RS_222] Group By Operator [GBY_206] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 67 [SIMPLE_EDGE] + <-Reducer 61 [SIMPLE_EDGE] SHUFFLE [RS_205] Group By Operator [GBY_204] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_365] (rows=17424 width=471) + Merge Join Operator [MERGEJOIN_353] (rows=17424 width=471) Conds:RS_200._col2=RS_201._col0(Inner) - <-Map 71 [SIMPLE_EDGE] + <-Map 65 [SIMPLE_EDGE] SHUFFLE [RS_201] PartitionCols:_col0 Select Operator [SEL_193] (rows=852 width=1910) Output:["_col0"] - Filter Operator [FIL_341] (rows=852 width=1910) + Filter Operator [FIL_329] (rows=852 width=1910) predicate:((s_store_name = 'ese') and s_store_sk is not null) TableScan [TS_191] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 66 [SIMPLE_EDGE] + <-Reducer 60 [SIMPLE_EDGE] SHUFFLE [RS_200] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_364] (rows=15840 width=471) + Merge Join Operator [MERGEJOIN_352] (rows=15840 width=471) Conds:RS_197._col0=RS_198._col0(Inner),Output:["_col2"] - <-Map 70 [SIMPLE_EDGE] + <-Map 64 [SIMPLE_EDGE] SHUFFLE [RS_198] PartitionCols:_col0 Select Operator [SEL_190] (rows=14400 width=471) Output:["_col0"] - Filter Operator [FIL_340] (rows=14400 width=471) + Filter Operator [FIL_328] (rows=14400 width=471) predicate:(((t_hour = 12) and (t_minute < 30)) and t_time_sk is not null) TableScan [TS_188] (rows=86400 width=471) default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 65 [SIMPLE_EDGE] + <-Reducer 59 [SIMPLE_EDGE] SHUFFLE [RS_197] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_363] (rows=3960 width=107) + Merge Join Operator [MERGEJOIN_351] (rows=3960 width=107) Conds:RS_194._col1=RS_195._col0(Inner),Output:["_col0","_col2"] - <-Map 64 [SIMPLE_EDGE] + <-Map 58 [SIMPLE_EDGE] SHUFFLE [RS_194] PartitionCols:_col1 Select Operator [SEL_184] (rows=1 width=0) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_338] (rows=1 width=0) + Filter Operator [FIL_326] (rows=1 width=0) predicate:((ss_hdemo_sk is not null and ss_sold_time_sk is not null) and ss_store_sk is not null) TableScan [TS_182] (rows=1 width=0) default@store_sales,store_sales,Tbl:PARTIAL,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 69 [SIMPLE_EDGE] + <-Map 63 [SIMPLE_EDGE] SHUFFLE [RS_195] PartitionCols:_col0 Select Operator [SEL_187] (rows=3600 width=107) Output:["_col0"] - Filter Operator [FIL_339] (rows=3600 width=107) + Filter Operator [FIL_327] (rows=3600 width=107) predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) TableScan [TS_185] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] diff --git ql/src/test/results/clientpositive/ppd_outer_join5.q.out ql/src/test/results/clientpositive/ppd_outer_join5.q.out index 492865c..65ca9d1 100644 --- ql/src/test/results/clientpositive/ppd_outer_join5.q.out +++ ql/src/test/results/clientpositive/ppd_outer_join5.q.out @@ -30,16 +30,14 @@ POSTHOOK: query: create table t4 (id int, key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t4 -Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20 PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -73,33 +71,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - filter predicates: - 0 - 1 {true} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: t3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -117,19 +88,25 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Right Outer Join0 to 1 + Inner Join 0 to 2 + filter predicates: + 0 + 1 {true} + 2 keys: 0 1 + 2 outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -141,16 +118,14 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -184,30 +159,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2, _col4, _col5 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: t3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -225,19 +176,21 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 + Left Outer Join0 to 2 keys: 0 1 + 2 outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -249,16 +202,14 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -292,30 +243,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2, _col4, _col5 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: t3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -333,19 +260,21 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 + Left Outer Join0 to 2 keys: 0 1 + 2 outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_join_stats.q.out ql/src/test/results/clientpositive/spark/auto_join_stats.q.out index 704875a..a95c706 100644 --- ql/src/test/results/clientpositive/spark/auto_join_stats.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_stats.q.out @@ -203,6 +203,7 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) Local Work: Map Reduce Local Work Map 5 @@ -221,6 +222,7 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) Local Work: Map Reduce Local Work @@ -279,30 +281,23 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Map 5 - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 2 Map 5 + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out index ac6c697..f5965a1 100644 --- ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out @@ -207,6 +207,7 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) Local Work: Map Reduce Local Work Map 4 @@ -225,6 +226,7 @@ STAGE PLANS: keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) Local Work: Map Reduce Local Work @@ -257,30 +259,23 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) + 2 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 3 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Map 4 - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 2 Map 4 + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/bucketizedhiveinputformat.q.out ql/src/test/results/clientpositive/spark/bucketizedhiveinputformat.q.out index c8fc4d3..f164f9d 100644 --- ql/src/test/results/clientpositive/spark/bucketizedhiveinputformat.q.out +++ ql/src/test/results/clientpositive/spark/bucketizedhiveinputformat.q.out @@ -22,8 +22,7 @@ POSTHOOK: query: CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@T2 -Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM ( SELECT tmp1.name as name FROM ( SELECT name, 'MMM' AS n FROM T1) tmp1 diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out index 764bcfe..ef8c674 100644 --- ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out +++ ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out @@ -30,8 +30,7 @@ POSTHOOK: query: create table t4 (id int, key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t4 -Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20 PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20 @@ -44,8 +43,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -64,7 +62,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: t2 @@ -80,7 +78,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) - Map 5 + Map 4 Map Operator Tree: TableScan alias: t3 @@ -101,35 +99,24 @@ STAGE PLANS: Join Operator condition map: Right Outer Join0 to 1 + Inner Join 0 to 2 filter predicates: 0 1 {true} + 2 keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 + 2 outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -141,8 +128,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 @@ -155,8 +141,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -175,7 +160,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: t2 @@ -191,7 +176,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) - Map 5 + Map 4 Map Operator Tree: TableScan alias: t3 @@ -212,32 +197,20 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + Left Outer Join0 to 2 keys: 0 1 - outputColumnNames: _col1, _col2, _col4, _col5 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 - 1 + 2 outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -249,8 +222,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 @@ -263,8 +235,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -283,7 +254,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: t2 @@ -299,7 +270,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) - Map 5 + Map 4 Map Operator Tree: TableScan alias: t3 @@ -320,32 +291,20 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + Left Outer Join0 to 2 keys: 0 1 - outputColumnNames: _col1, _col2, _col4, _col5 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 - 1 + 2 outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat