diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinBranchInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinBranchInfo.java index b40c34db72..027725616d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinBranchInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinBranchInfo.java @@ -28,6 +28,7 @@ // important, it should set this to false. This does not guarantee that // the edge will stay, however, it increases the chances. private boolean shouldRemove; + private boolean ignoreTableSize; public SemiJoinBranchInfo(TableScanOperator ts) { this.ts = ts; @@ -60,4 +61,22 @@ public void setShouldRemove(boolean shouldRemove) { this.shouldRemove = shouldRemove; } } + + public void setIgnoreTableSize(boolean ignoreTableSize) { + this.ignoreTableSize = true; + } + + /* + * Ignore the table size check when applying this semijoin filter. + * We currently only use it for self-joins. + */ + public boolean getIgnoreTableSize() { + return this.ignoreTableSize; + } + + @Override + public String toString() { + return "SemiJoinBranchInfo [ts=" + ts + ", isHint=" + isHint + ", shouldRemove=" + shouldRemove + + ", ignoreTableSize=" + ignoreTableSize + "]"; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index ddcd022d4c..0db679b535 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -82,6 +82,7 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.BucketVersionPopulator; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption; @@ -505,7 +506,11 @@ private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx, if (semiJoinReductionEnabled) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); markSemiJoinForDPP(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based on DPP"); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + markSemiJoinForSelfJoins(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based on self joins"); // Remove any semi join edges from Union Op perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); @@ -1138,11 +1143,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } - if (removeBasedOnStats) { + if (removeBasedOnStats && !sjInfo.getIgnoreTableSize()) { // At this point, hinted semijoin case has been handled already // Check if big table is big enough that runtime filtering is // worth it. TableScanOperator ts = sjInfo.getTsOp(); + if (ts.getStatistics() != null) { long numRows = ts.getStatistics().getNumRows(); if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) { @@ -1908,6 +1914,8 @@ private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx) tsExpr = ((ExprNodeFieldDesc)tsExpr).getDesc(); } + if (ExprNodeDescUtils.getColumnExpr(tsExpr) == null) { semijoinRsToRemove.add(rs); continue; } + String colName = ExprNodeDescUtils.getColumnExpr(tsExpr).getColumn(); // We check whether there was already another SJ over this TS that was selected // in previous iteration @@ -2088,6 +2096,55 @@ private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) } } + /* + * This is looking for a self-join with a filter condition on one side. + * + * TS[1] (date_dim) + * -> FIL[2] (d_year=2001) + * -> RS[3] (d_week_seq) + * -> TS[4] (date_dim) + * + */ + private void markSemiJoinForSelfJoins(OptimizeTezProcContext procCtx) { + // Stores the Tablescan operators processed to avoid redoing them. + Map map = procCtx.parseContext.getRsToSemiJoinBranchInfo(); + + for (ReduceSinkOperator rs : map.keySet()) { + // Look for TS -> FIL -> RS backwards and compare to TS1 + + final SemiJoinBranchInfo sjInfo = map.get(rs); + if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) { + continue; + } + + final TableScanOperator targetTs = sjInfo.getTsOp(); + final Table targetTable = targetTs.getConf().getTableMetadata(); + + Operator op = rs; + + // name the loop, because this might add more loops later + parents: while (op != null) { + final List> parentOperators = op.getParentOperators(); + if (parentOperators == null || parentOperators.size() != 1) { + // Think that's a join, give up on the traversal + break parents; + } + // invariant: there's one parent op, no more + op = parentOperators.get(0); + if (op instanceof FilterOperator || op instanceof TopNKeyOperator || op instanceof SelectOperator) { + continue; + } else if (op instanceof TableScanOperator) { + final TableScanOperator sourceTs = (TableScanOperator) op; + final Table sourceTable = sourceTs.getConf().getTableMetadata(); + if (sourceTable.equals(targetTable)) { + sjInfo.setIgnoreTableSize(true); + } + break parents; + } + } + } + } + private void bucketingVersionSanityCheck(OptimizeTezProcContext procCtx) throws SemanticException { // Fetch all the FileSinkOperators. Set fsOpsAll = new HashSet<>(); diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out index 9e5c1582c4..b26c63814c 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out @@ -128,21 +128,29 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 9 <- Union 2 (CONTAINS) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Map 14 <- Union 15 (CONTAINS) +Map 18 <- Union 15 (CONTAINS) +Map 19 <- Reducer 13 (BROADCAST_EDGE) +Map 8 <- Union 2 (CONTAINS) +Map 9 <- Reducer 11 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 13 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 19 (SIMPLE_EDGE), Union 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Map 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_173] - Select Operator [SEL_172] (rows=12881 width=788) + File Output Operator [FS_197] + Select Operator [SEL_196] (rows=12881 width=788) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_57] @@ -150,24 +158,90 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Merge Join Operator [MERGEJOIN_146] (rows=12881 width=1572) Conds:RS_53.(_col0 - 53)=RS_54._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_145] (rows=652 width=788) + Conds:RS_195._col0=RS_176._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_176] + PartitionCols:_col0 + Select Operator [SEL_173] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_171] (rows=652 width=8) + predicate:((d_year = 2001) and d_week_seq is not null) + TableScan [TS_20] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_week_seq","d_year"] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] + PartitionCols:_col0 + Group By Operator [GBY_194] (rows=13152 width=788) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Group By Operator [GBY_39] (rows=3182784 width=788) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0 + Select Operator [SEL_37] (rows=430516591 width=143) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_144] (rows=430516591 width=143) + Conds:Union 15._col0=RS_193._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_193] + PartitionCols:_col0 + Select Operator [SEL_192] (rows=73049 width=36) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_191] (rows=73049 width=99) + predicate:(d_week_seq is not null and d_week_seq BETWEEN DynamicValue(RS_47_date_dim_d_week_seq_min) AND DynamicValue(RS_47_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_47_date_dim_d_week_seq_bloom_filter))) + TableScan [TS_31] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_190] + Group By Operator [GBY_189] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_181] + Group By Operator [GBY_179] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_177] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_173] + <-Union 15 [SIMPLE_EDGE] + <-Map 14 [CONTAINS] vectorized + Reduce Output Operator [RS_203] + PartitionCols:_col0 + Select Operator [SEL_202] (rows=143966864 width=115) + Output:["_col0","_col1"] + Filter Operator [FIL_201] (rows=143966864 width=115) + predicate:ws_sold_date_sk is not null + TableScan [TS_157] (rows=144002668 width=115) + Output:["ws_sold_date_sk","ws_ext_sales_price"] + <-Map 18 [CONTAINS] vectorized + Reduce Output Operator [RS_206] + PartitionCols:_col0 + Select Operator [SEL_205] (rows=286549727 width=115) + Output:["_col0","_col1"] + Filter Operator [FIL_204] (rows=286549727 width=115) + predicate:cs_sold_date_sk is not null + TableScan [TS_162] (rows=287989836 width=115) + Output:["cs_sold_date_sk","cs_ext_sales_price"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:(_col0 - 53) Merge Join Operator [MERGEJOIN_143] (rows=652 width=788) - Conds:RS_164._col0=RS_170._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] + Conds:RS_188._col0=RS_174._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_174] PartitionCols:_col0 - Select Operator [SEL_168] (rows=652 width=4) + Select Operator [SEL_172] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_166] (rows=652 width=8) + Filter Operator [FIL_170] (rows=652 width=8) predicate:((d_year = 2002) and d_week_seq is not null) - TableScan [TS_20] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_week_seq","d_year"] + Please refer to the previous TableScan [TS_20] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_188] PartitionCols:_col0 - Group By Operator [GBY_163] (rows=13152 width=788) + Group By Operator [GBY_187] (rows=13152 width=788) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] @@ -177,50 +251,44 @@ Stage-0 Select Operator [SEL_14] (rows=430516591 width=143) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Merge Join Operator [MERGEJOIN_142] (rows=430516591 width=143) - Conds:Union 2._col0=RS_162._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + Conds:Union 2._col0=RS_186._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] PartitionCols:_col0 - Select Operator [SEL_161] (rows=73049 width=36) + Select Operator [SEL_185] (rows=73049 width=36) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_160] (rows=73049 width=99) - predicate:d_week_seq is not null + Filter Operator [FIL_184] (rows=73049 width=99) + predicate:(d_week_seq is not null and d_week_seq BETWEEN DynamicValue(RS_51_date_dim_d_week_seq_min) AND DynamicValue(RS_51_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_51_date_dim_d_week_seq_bloom_filter))) TableScan [TS_8] (rows=73049 width=99) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_183] + Group By Operator [GBY_182] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_180] + Group By Operator [GBY_178] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_175] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_172] <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] vectorized - Reduce Output Operator [RS_159] + Reduce Output Operator [RS_169] PartitionCols:_col0 - Select Operator [SEL_158] (rows=143966864 width=115) + Select Operator [SEL_168] (rows=143966864 width=115) Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=143966864 width=115) + Filter Operator [FIL_167] (rows=143966864 width=115) predicate:ws_sold_date_sk is not null TableScan [TS_147] (rows=144002668 width=115) Output:["ws_sold_date_sk","ws_ext_sales_price"] - <-Map 9 [CONTAINS] vectorized - Reduce Output Operator [RS_176] + <-Map 8 [CONTAINS] vectorized + Reduce Output Operator [RS_200] PartitionCols:_col0 - Select Operator [SEL_175] (rows=286549727 width=115) + Select Operator [SEL_199] (rows=286549727 width=115) Output:["_col0","_col1"] - Filter Operator [FIL_174] (rows=286549727 width=115) + Filter Operator [FIL_198] (rows=286549727 width=115) predicate:cs_sold_date_sk is not null TableScan [TS_152] (rows=287989836 width=115) Output:["cs_sold_date_sk","cs_ext_sales_price"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_145] (rows=652 width=788) - Conds:RS_165._col0=RS_171._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_171] - PartitionCols:_col0 - Select Operator [SEL_169] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_167] (rows=652 width=8) - predicate:((d_year = 2001) and d_week_seq is not null) - Please refer to the previous TableScan [TS_20] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_165] - PartitionCols:_col0 - Please refer to the previous Group By Operator [GBY_163] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query41.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query41.q.out index 75b7642dd5..15ae84b588 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query41.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query41.q.out @@ -107,24 +107,26 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage +Map 1 <- Reducer 7 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_59] - Limit [LIM_58] (rows=100 width=107) + File Output Operator [FS_64] + Limit [LIM_63] (rows=100 width=107) Number of rows:100 - Select Operator [SEL_57] (rows=376 width=107) + Select Operator [SEL_62] (rows=376 width=107) Output:["_col0"] <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_56] - Group By Operator [GBY_55] (rows=376 width=107) + SHUFFLE [RS_61] + Group By Operator [GBY_60] (rows=376 width=107) Output:["_col0"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_16] @@ -134,34 +136,45 @@ Stage-0 Top N Key Operator [TNK_30] (rows=752 width=107) keys:_col1,top n:100 Merge Join Operator [MERGEJOIN_43] (rows=752 width=107) - Conds:RS_46._col0=RS_54._col0(Inner),Output:["_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_46] - PartitionCols:_col0 - Select Operator [SEL_45] (rows=20726 width=202) - Output:["_col0","_col1"] - Filter Operator [FIL_44] (rows=20726 width=205) - predicate:(i_manufact_id BETWEEN 970 AND 1010 and i_manufact is not null) - TableScan [TS_0] (rows=462000 width=205) - default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_manufact_id","i_manufact","i_product_name"] + Conds:RS_59._col0=RS_51._col0(Inner),Output:["_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_54] + PARTITION_ONLY_SHUFFLE [RS_51] PartitionCols:_col0 - Select Operator [SEL_53] (rows=46 width=95) + Select Operator [SEL_50] (rows=46 width=95) Output:["_col0"] - Filter Operator [FIL_52] (rows=46 width=103) + Filter Operator [FIL_49] (rows=46 width=103) predicate:(_col1 > 0L) - Group By Operator [GBY_51] (rows=140 width=103) + Group By Operator [GBY_48] (rows=140 width=103) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_50] + SHUFFLE [RS_47] PartitionCols:_col0 - Group By Operator [GBY_49] (rows=140 width=103) + Group By Operator [GBY_46] (rows=140 width=103) Output:["_col0","_col1"],aggregations:["count()"],keys:i_manufact - Select Operator [SEL_48] (rows=280 width=450) + Select Operator [SEL_45] (rows=280 width=450) Output:["i_manufact"] - Filter Operator [FIL_47] (rows=280 width=450) + Filter Operator [FIL_44] (rows=280 width=450) predicate:((((i_category = 'Women') and (i_color) IN ('frosted', 'rose') and (i_units) IN ('Lb', 'Gross') and (i_size) IN ('medium', 'large')) or ((i_category = 'Women') and (i_color) IN ('chocolate', 'black') and (i_units) IN ('Box', 'Dram') and (i_size) IN ('economy', 'petite')) or ((i_category = 'Men') and (i_color) IN ('slate', 'magenta') and (i_units) IN ('Carton', 'Bundle') and (i_size) IN ('N/A', 'small')) or ((i_category = 'Men') and (i_color) IN ('cornflower', 'firebrick') and (i_units) IN ('Pound', 'Oz') and (i_size) IN ('medium', 'large')) or ((i_category = 'Women') and (i_color) IN ('almond', 'steel') and (i_units) IN ('Tsp', 'Case') and (i_size) IN ('medium', 'large')) or ((i_category = 'Women') and (i_color) IN ('purple', 'aquamarine') and (i_units) IN ('Bunch', 'Gram') and (i_size) IN ('economy', 'petite')) or ((i_category = 'Men') and (i_color) IN ('lavender', 'papaya') and (i_units) IN ('Pallet', 'Cup') and (i_size) IN ('N/A', 'small')) or ((i_category = 'Men') and (i_color) IN ('maroon', 'cyan') and (i_units) IN ('Each', 'N/A') and (i_size) IN ('medium', 'large'))) and i_manufact is not null) TableScan [TS_3] (rows=462000 width=450) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_category","i_manufact","i_size","i_color","i_units"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_59] + PartitionCols:_col0 + Select Operator [SEL_58] (rows=20726 width=202) + Output:["_col0","_col1"] + Filter Operator [FIL_57] (rows=20726 width=205) + predicate:(i_manufact_id BETWEEN 970 AND 1010 and i_manufact is not null and i_manufact BETWEEN DynamicValue(RS_12_item_i_manufact_min) AND DynamicValue(RS_12_item_i_manufact_max) and in_bloom_filter(i_manufact, DynamicValue(RS_12_item_i_manufact_bloom_filter))) + TableScan [TS_0] (rows=462000 width=205) + default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_manufact_id","i_manufact","i_product_name"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_56] + Group By Operator [GBY_55] (rows=1 width=552) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_54] + Group By Operator [GBY_53] (rows=1 width=552) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_52] (rows=46 width=95) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_50] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out index aac99e9303..89f28820f4 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out @@ -149,27 +149,29 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 18 (BROADCAST_EDGE) -Map 26 <- Reducer 21 (BROADCAST_EDGE) -Map 27 <- Reducer 24 (BROADCAST_EDGE) -Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE) +Map 15 <- Reducer 19 (BROADCAST_EDGE) +Map 27 <- Reducer 22 (BROADCAST_EDGE) +Map 28 <- Reducer 25 (BROADCAST_EDGE) +Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 17 <- Map 26 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 17 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 20 <- Map 18 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 21 <- Map 26 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 18 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 24 <- Map 26 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 @@ -177,24 +179,24 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_355] - Limit [LIM_354] (rows=100 width=212) + File Output Operator [FS_361] + Limit [LIM_360] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_353] (rows=355 width=212) + Select Operator [SEL_359] (rows=355 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] - Top N Key Operator [TNK_351] (rows=355 width=212) + SHUFFLE [RS_358] + Top N Key Operator [TNK_357] (rows=355 width=212) keys:_col1,top n:100 - Group By Operator [GBY_350] (rows=355 width=212) + Group By Operator [GBY_356] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_371] + Reduce Output Operator [RS_377] PartitionCols:_col0 - Group By Operator [GBY_370] (rows=355 width=212) + Group By Operator [GBY_376] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_369] (rows=355 width=212) + Group By Operator [GBY_375] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_106] @@ -207,86 +209,99 @@ Stage-0 SHUFFLE [RS_101] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_291] (rows=15609 width=104) - Conds:RS_316._col1=RS_322._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] - PartitionCols:_col1 - Select Operator [SEL_315] (rows=462000 width=104) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=462000 width=104) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + Conds:RS_328._col1=RS_320._col0(Inner),Output:["_col0","_col1"] <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] + SHUFFLE [RS_320] PartitionCols:_col0 - Group By Operator [GBY_321] (rows=10500 width=100) + Group By Operator [GBY_319] (rows=10500 width=100) Output:["_col0"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + SHUFFLE [RS_318] PartitionCols:_col0 - Group By Operator [GBY_319] (rows=10500 width=100) + Group By Operator [GBY_317] (rows=10500 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_318] (rows=21000 width=189) + Select Operator [SEL_316] (rows=21000 width=189) Output:["i_item_id"] - Filter Operator [FIL_317] (rows=21000 width=189) + Filter Operator [FIL_315] (rows=21000 width=189) predicate:(i_color) IN ('orchid', 'chiffon', 'lace') TableScan [TS_2] (rows=462000 width=189) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"] - <-Reducer 23 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] + PartitionCols:_col1 + Select Operator [SEL_327] (rows=462000 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_326] (rows=462000 width=104) + predicate:(i_item_id BETWEEN DynamicValue(RS_26_item_i_item_id_min) AND DynamicValue(RS_26_item_i_item_id_max) and in_bloom_filter(i_item_id, DynamicValue(RS_26_item_i_item_id_bloom_filter))) + TableScan [TS_0] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_325] + Group By Operator [GBY_324] (rows=1 width=552) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + Group By Operator [GBY_322] (rows=1 width=552) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_321] (rows=10500 width=100) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_319] + <-Reducer 24 [SIMPLE_EDGE] SHUFFLE [RS_102] PartitionCols:_col2 Select Operator [SEL_97] (rows=788222 width=110) Output:["_col2","_col4"] Merge Join Operator [MERGEJOIN_299] (rows=788222 width=110) - Conds:RS_94._col2=RS_346._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_346] + Conds:RS_94._col2=RS_352._col0(Inner),Output:["_col1","_col3"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_352] PartitionCols:_col0 - Select Operator [SEL_343] (rows=8000000 width=4) + Select Operator [SEL_349] (rows=8000000 width=4) Output:["_col0"] - Filter Operator [FIL_342] (rows=8000000 width=112) + Filter Operator [FIL_348] (rows=8000000 width=112) predicate:(ca_gmt_offset = -8) TableScan [TS_15] (rows=40000000 width=112) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] + <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_94] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_298] (rows=3941109 width=118) - Conds:RS_368._col0=RS_329._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_329] + Conds:RS_374._col0=RS_335._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_335] PartitionCols:_col0 - Select Operator [SEL_324] (rows=50 width=4) + Select Operator [SEL_330] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_323] (rows=50 width=12) + Filter Operator [FIL_329] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 1)) TableScan [TS_12] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_368] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_374] PartitionCols:_col0 - Select Operator [SEL_367] (rows=143931246 width=123) + Select Operator [SEL_373] (rows=143931246 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_366] (rows=143931246 width=123) + Filter Operator [FIL_372] (rows=143931246 width=123) predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_92_date_dim_d_date_sk_min) AND DynamicValue(RS_92_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_92_date_dim_d_date_sk_bloom_filter))) TableScan [TS_82] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_365] - Group By Operator [GBY_364] (rows=1 width=12) + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_371] + Group By Operator [GBY_370] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_336] - Group By Operator [GBY_333] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_342] + Group By Operator [GBY_339] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_330] (rows=50 width=4) + Select Operator [SEL_336] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_324] + Please refer to the previous Select Operator [SEL_330] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_349] + Reduce Output Operator [RS_355] PartitionCols:_col0 - Group By Operator [GBY_348] (rows=355 width=212) + Group By Operator [GBY_354] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_347] (rows=355 width=212) + Group By Operator [GBY_353] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_33] @@ -299,52 +314,52 @@ Stage-0 SHUFFLE [RS_28] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_291] - <-Reducer 16 [SIMPLE_EDGE] + <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col2 Select Operator [SEL_24] (rows=2876890 width=4) Output:["_col2","_col4"] Merge Join Operator [MERGEJOIN_293] (rows=2876890 width=4) - Conds:RS_21._col2=RS_344._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] + Conds:RS_21._col2=RS_350._col0(Inner),Output:["_col1","_col3"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_350] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_343] - <-Reducer 15 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_349] + <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_292] (rows=14384447 width=4) - Conds:RS_341._col0=RS_325._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_325] + Conds:RS_347._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_331] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_324] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_341] + Please refer to the previous Select Operator [SEL_330] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_347] PartitionCols:_col0 - Select Operator [SEL_340] (rows=525327191 width=118) + Select Operator [SEL_346] (rows=525327191 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_339] (rows=525327191 width=118) + Filter Operator [FIL_345] (rows=525327191 width=118) predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) TableScan [TS_9] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_338] - Group By Operator [GBY_337] (rows=1 width=12) + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_344] + Group By Operator [GBY_343] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_334] - Group By Operator [GBY_331] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_340] + Group By Operator [GBY_337] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_326] (rows=50 width=4) + Select Operator [SEL_332] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_324] + Please refer to the previous Select Operator [SEL_330] <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_363] + Reduce Output Operator [RS_369] PartitionCols:_col0 - Group By Operator [GBY_362] (rows=355 width=212) + Group By Operator [GBY_368] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_361] (rows=355 width=212) + Group By Operator [GBY_367] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_69] @@ -357,44 +372,44 @@ Stage-0 SHUFFLE [RS_64] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_291] - <-Reducer 20 [SIMPLE_EDGE] + <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col3 Select Operator [SEL_60] (rows=1550375 width=13) Output:["_col3","_col4"] Merge Join Operator [MERGEJOIN_296] (rows=1550375 width=13) - Conds:RS_57._col1=RS_345._col0(Inner),Output:["_col2","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] + Conds:RS_57._col1=RS_351._col0(Inner),Output:["_col2","_col3"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_351] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_343] - <-Reducer 19 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_349] + <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_295] (rows=7751872 width=98) - Conds:RS_360._col0=RS_327._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_327] + Conds:RS_366._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_333] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_324] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_360] + Please refer to the previous Select Operator [SEL_330] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_366] PartitionCols:_col0 - Select Operator [SEL_359] (rows=285117733 width=123) + Select Operator [SEL_365] (rows=285117733 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_358] (rows=285117733 width=123) + Filter Operator [FIL_364] (rows=285117733 width=123) predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_55_date_dim_d_date_sk_min) AND DynamicValue(RS_55_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_55_date_dim_d_date_sk_bloom_filter))) TableScan [TS_45] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_357] - Group By Operator [GBY_356] (rows=1 width=12) + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_363] + Group By Operator [GBY_362] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_335] - Group By Operator [GBY_332] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + Group By Operator [GBY_338] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_328] (rows=50 width=4) + Select Operator [SEL_334] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_324] + Please refer to the previous Select Operator [SEL_330] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out index c147b9cd65..e99577bd7c 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out @@ -95,10 +95,14 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage +Map 14 <- Reducer 16 (BROADCAST_EDGE) +Map 18 <- Reducer 17 (BROADCAST_EDGE) Reducer 10 <- Map 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 14 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 11 <- Map 18 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (SIMPLE_EDGE) Reducer 13 <- Map 15 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) @@ -111,10 +115,10 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_208] - Limit [LIM_207] (rows=100 width=976) + File Output Operator [FS_219] + Limit [LIM_218] (rows=100 width=976) Number of rows:100 - Select Operator [SEL_206] (rows=552189 width=976) + Select Operator [SEL_217] (rows=552189 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_58] @@ -128,20 +132,20 @@ Stage-0 SHUFFLE [RS_55] PartitionCols:_col1, (_col0 - 52) Merge Join Operator [MERGEJOIN_180] (rows=28847 width=676) - Conds:RS_205._col0=RS_203._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Conds:RS_216._col0=RS_197._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] + PARTITION_ONLY_SHUFFLE [RS_197] PartitionCols:_col0 - Select Operator [SEL_201] (rows=317 width=4) + Select Operator [SEL_194] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_199] (rows=317 width=8) + Filter Operator [FIL_192] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) TableScan [TS_19] (rows=73049 width=8) default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_week_seq"] <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_205] + SHUFFLE [RS_216] PartitionCols:_col0 - Group By Operator [GBY_204] (rows=1196832 width=679) + Group By Operator [GBY_215] (rows=1196832 width=679) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_38] @@ -151,16 +155,7 @@ Stage-0 Select Operator [SEL_35] (rows=525329897 width=138) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] Merge Join Operator [MERGEJOIN_179] (rows=525329897 width=138) - Conds:RS_190._col0=RS_195._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_195] - PartitionCols:_col0 - Select Operator [SEL_193] (rows=73049 width=36) - Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_191] (rows=73049 width=99) - predicate:d_week_seq is not null - TableScan [TS_7] (rows=73049 width=99) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] + Conds:RS_190._col0=RS_214._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_190] PartitionCols:_col0 @@ -170,6 +165,26 @@ Stage-0 predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_4] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_213] (rows=73049 width=36) + Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_212] (rows=73049 width=99) + predicate:(d_week_seq is not null and d_week_seq BETWEEN DynamicValue(RS_45_d_d_week_seq_min) AND DynamicValue(RS_45_d_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_45_d_d_week_seq_bloom_filter))) + TableScan [TS_29] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_202] + Group By Operator [GBY_200] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_198] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_194] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_54] PartitionCols:_col3, _col5 @@ -179,19 +194,19 @@ Stage-0 SHUFFLE [RS_52] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_178] (rows=28847 width=788) - Conds:RS_197._col0=RS_202._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Conds:RS_209._col0=RS_195._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] + PARTITION_ONLY_SHUFFLE [RS_195] PartitionCols:_col0 - Select Operator [SEL_200] (rows=317 width=4) + Select Operator [SEL_193] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_198] (rows=317 width=8) + Filter Operator [FIL_191] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) Please refer to the previous TableScan [TS_19] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_197] + SHUFFLE [RS_209] PartitionCols:_col0 - Group By Operator [GBY_196] (rows=1196832 width=791) + Group By Operator [GBY_208] (rows=1196832 width=791) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_16] @@ -201,17 +216,31 @@ Stage-0 Select Operator [SEL_13] (rows=525329897 width=142) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] Merge Join Operator [MERGEJOIN_177] (rows=525329897 width=142) - Conds:RS_189._col0=RS_194._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_194] - PartitionCols:_col0 - Select Operator [SEL_192] (rows=73049 width=36) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Please refer to the previous Filter Operator [FIL_191] + Conds:RS_189._col0=RS_207._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_189] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_188] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_207] + PartitionCols:_col0 + Select Operator [SEL_206] (rows=73049 width=36) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_205] (rows=73049 width=99) + predicate:(d_week_seq is not null and d_week_seq BETWEEN DynamicValue(RS_23_d_d_week_seq_min) AND DynamicValue(RS_23_d_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_23_d_d_week_seq_bloom_filter))) + TableScan [TS_7] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_204] + Group By Operator [GBY_203] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_201] + Group By Operator [GBY_199] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_196] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_193] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_51] PartitionCols:_col0 diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out index a711f8a5a5..8fddccec65 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out @@ -169,27 +169,29 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 18 (BROADCAST_EDGE) -Map 26 <- Reducer 21 (BROADCAST_EDGE) -Map 27 <- Reducer 24 (BROADCAST_EDGE) -Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE) +Map 15 <- Reducer 19 (BROADCAST_EDGE) +Map 27 <- Reducer 22 (BROADCAST_EDGE) +Map 28 <- Reducer 25 (BROADCAST_EDGE) +Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 17 <- Map 26 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 17 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 20 <- Map 18 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 21 <- Map 26 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 18 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 24 <- Map 26 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 @@ -197,26 +199,26 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_364] - Limit [LIM_363] (rows=100 width=212) + File Output Operator [FS_370] + Limit [LIM_369] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_362] (rows=1717 width=212) + Select Operator [SEL_368] (rows=1717 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_361] - Top N Key Operator [TNK_360] (rows=1717 width=212) + SHUFFLE [RS_367] + Top N Key Operator [TNK_366] (rows=1717 width=212) keys:_col0, _col1,top n:100 - Group By Operator [GBY_359] (rows=1717 width=212) + Group By Operator [GBY_365] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_382] + Reduce Output Operator [RS_388] PartitionCols:_col0 - Group By Operator [GBY_381] (rows=1717 width=212) + Group By Operator [GBY_387] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_380] (rows=5151 width=212) + Top N Key Operator [TNK_386] (rows=5151 width=212) keys:_col0,top n:100 - Group By Operator [GBY_379] (rows=1717 width=212) + Group By Operator [GBY_385] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_106] @@ -229,88 +231,101 @@ Stage-0 SHUFFLE [RS_101] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_296] (rows=34340 width=104) - Conds:RS_324._col1=RS_330._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] - PartitionCols:_col1 - Select Operator [SEL_323] (rows=462000 width=104) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=462000 width=104) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + Conds:RS_336._col1=RS_328._col0(Inner),Output:["_col0","_col1"] <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_330] + SHUFFLE [RS_328] PartitionCols:_col0 - Group By Operator [GBY_329] (rows=23100 width=100) + Group By Operator [GBY_327] (rows=23100 width=100) Output:["_col0"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_328] + SHUFFLE [RS_326] PartitionCols:_col0 - Group By Operator [GBY_327] (rows=23100 width=100) + Group By Operator [GBY_325] (rows=23100 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_326] (rows=46200 width=190) + Select Operator [SEL_324] (rows=46200 width=190) Output:["i_item_id"] - Filter Operator [FIL_325] (rows=46200 width=190) + Filter Operator [FIL_323] (rows=46200 width=190) predicate:(i_category = 'Children') TableScan [TS_2] (rows=462000 width=190) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_category"] - <-Reducer 23 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_336] + PartitionCols:_col1 + Select Operator [SEL_335] (rows=462000 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_334] (rows=462000 width=104) + predicate:(i_item_id BETWEEN DynamicValue(RS_26_item_i_item_id_min) AND DynamicValue(RS_26_item_i_item_id_max) and in_bloom_filter(i_item_id, DynamicValue(RS_26_item_i_item_id_bloom_filter))) + TableScan [TS_0] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_333] + Group By Operator [GBY_332] (rows=1 width=552) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_331] + Group By Operator [GBY_330] (rows=1 width=552) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_329] (rows=23100 width=100) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_327] + <-Reducer 24 [SIMPLE_EDGE] SHUFFLE [RS_102] PartitionCols:_col2 Select Operator [SEL_97] (rows=788222 width=110) Output:["_col2","_col4"] Merge Join Operator [MERGEJOIN_304] (rows=788222 width=110) - Conds:RS_94._col2=RS_354._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_354] + Conds:RS_94._col2=RS_360._col0(Inner),Output:["_col1","_col3"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_360] PartitionCols:_col0 - Select Operator [SEL_351] (rows=8000000 width=4) + Select Operator [SEL_357] (rows=8000000 width=4) Output:["_col0"] - Filter Operator [FIL_350] (rows=8000000 width=112) + Filter Operator [FIL_356] (rows=8000000 width=112) predicate:(ca_gmt_offset = -6) TableScan [TS_15] (rows=40000000 width=112) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] + <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_94] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_303] (rows=3941109 width=118) - Conds:RS_378._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_337] + Conds:RS_384._col0=RS_343._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_343] PartitionCols:_col0 - Select Operator [SEL_332] (rows=50 width=4) + Select Operator [SEL_338] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_331] (rows=50 width=12) + Filter Operator [FIL_337] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 9)) TableScan [TS_12] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_378] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_384] PartitionCols:_col0 - Select Operator [SEL_377] (rows=143931246 width=123) + Select Operator [SEL_383] (rows=143931246 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_376] (rows=143931246 width=123) + Filter Operator [FIL_382] (rows=143931246 width=123) predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_92_date_dim_d_date_sk_min) AND DynamicValue(RS_92_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_92_date_dim_d_date_sk_bloom_filter))) TableScan [TS_82] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_375] - Group By Operator [GBY_374] (rows=1 width=12) + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_381] + Group By Operator [GBY_380] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_344] - Group By Operator [GBY_341] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_350] + Group By Operator [GBY_347] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_338] (rows=50 width=4) + Select Operator [SEL_344] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_332] + Please refer to the previous Select Operator [SEL_338] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_358] + Reduce Output Operator [RS_364] PartitionCols:_col0 - Group By Operator [GBY_357] (rows=1717 width=212) + Group By Operator [GBY_363] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_356] (rows=5151 width=212) + Top N Key Operator [TNK_362] (rows=5151 width=212) keys:_col0,top n:100 - Group By Operator [GBY_355] (rows=1717 width=212) + Group By Operator [GBY_361] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_33] @@ -323,54 +338,54 @@ Stage-0 SHUFFLE [RS_28] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_296] - <-Reducer 16 [SIMPLE_EDGE] + <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col2 Select Operator [SEL_24] (rows=2876890 width=4) Output:["_col2","_col4"] Merge Join Operator [MERGEJOIN_298] (rows=2876890 width=4) - Conds:RS_21._col2=RS_352._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] + Conds:RS_21._col2=RS_358._col0(Inner),Output:["_col1","_col3"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_358] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_351] - <-Reducer 15 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_357] + <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_297] (rows=14384447 width=4) - Conds:RS_349._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_333] + Conds:RS_355._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_339] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_332] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] + Please refer to the previous Select Operator [SEL_338] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_355] PartitionCols:_col0 - Select Operator [SEL_348] (rows=525327191 width=118) + Select Operator [SEL_354] (rows=525327191 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_347] (rows=525327191 width=118) + Filter Operator [FIL_353] (rows=525327191 width=118) predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) TableScan [TS_9] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_346] - Group By Operator [GBY_345] (rows=1 width=12) + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_352] + Group By Operator [GBY_351] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_342] - Group By Operator [GBY_339] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + Group By Operator [GBY_345] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_334] (rows=50 width=4) + Select Operator [SEL_340] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_332] + Please refer to the previous Select Operator [SEL_338] <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_373] + Reduce Output Operator [RS_379] PartitionCols:_col0 - Group By Operator [GBY_372] (rows=1717 width=212) + Group By Operator [GBY_378] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_371] (rows=5151 width=212) + Top N Key Operator [TNK_377] (rows=5151 width=212) keys:_col0,top n:100 - Group By Operator [GBY_370] (rows=1717 width=212) + Group By Operator [GBY_376] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_69] @@ -383,44 +398,44 @@ Stage-0 SHUFFLE [RS_64] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_296] - <-Reducer 20 [SIMPLE_EDGE] + <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col3 Select Operator [SEL_60] (rows=1550375 width=13) Output:["_col3","_col4"] Merge Join Operator [MERGEJOIN_301] (rows=1550375 width=13) - Conds:RS_57._col1=RS_353._col0(Inner),Output:["_col2","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_353] + Conds:RS_57._col1=RS_359._col0(Inner),Output:["_col2","_col3"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_359] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_351] - <-Reducer 19 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_357] + <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_300] (rows=7751872 width=98) - Conds:RS_369._col0=RS_335._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_335] + Conds:RS_375._col0=RS_341._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_332] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_369] + Please refer to the previous Select Operator [SEL_338] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_375] PartitionCols:_col0 - Select Operator [SEL_368] (rows=285117733 width=123) + Select Operator [SEL_374] (rows=285117733 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_367] (rows=285117733 width=123) + Filter Operator [FIL_373] (rows=285117733 width=123) predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_55_date_dim_d_date_sk_min) AND DynamicValue(RS_55_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_55_date_dim_d_date_sk_bloom_filter))) TableScan [TS_45] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_366] - Group By Operator [GBY_365] (rows=1 width=12) + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_372] + Group By Operator [GBY_371] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_343] - Group By Operator [GBY_340] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_349] + Group By Operator [GBY_346] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_336] (rows=50 width=4) + Select Operator [SEL_342] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_332] + Please refer to the previous Select Operator [SEL_338] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out index b5fc39d98f..0ff6ae3bed 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out @@ -82,35 +82,37 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 24 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Map 15 <- Reducer 19 (BROADCAST_EDGE) +Reducer 10 <- Map 25 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 19 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 20 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 21 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 22 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 23 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Map 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 21 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 23 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 24 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 12 vectorized - File Output Operator [FS_285] - Limit [LIM_284] (rows=100 width=312) + File Output Operator [FS_290] + Limit [LIM_289] (rows=100 width=312) Number of rows:100 - Select Operator [SEL_283] (rows=182953402 width=312) + Select Operator [SEL_288] (rows=182953402 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] - Top N Key Operator [TNK_281] (rows=182953402 width=312) + SHUFFLE [RS_287] + Top N Key Operator [TNK_286] (rows=182953402 width=312) keys:_col5, _col0, _col1, _col2,top n:100 - Group By Operator [GBY_280] (rows=182953402 width=312) + Group By Operator [GBY_285] (rows=182953402 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_66] @@ -120,11 +122,11 @@ Stage-0 Select Operator [SEL_63] (rows=471834849 width=292) Output:["_col0","_col1","_col2","_col3","_col4"] Merge Join Operator [MERGEJOIN_248] (rows=471834849 width=292) - Conds:RS_60._col4, _col6=RS_279._col0, _col1(Left Outer),Output:["_col13","_col15","_col19","_col25"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_279] + Conds:RS_60._col4, _col6=RS_284._col0, _col1(Left Outer),Output:["_col13","_col15","_col19","_col25"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_284] PartitionCols:_col0, _col1 - Select Operator [SEL_278] (rows=28798881 width=8) + Select Operator [SEL_283] (rows=28798881 width=8) Output:["_col0","_col1"] TableScan [TS_58] (rows=28798881 width=8) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number"] @@ -134,11 +136,11 @@ Stage-0 Select Operator [SEL_57] (rows=182953402 width=300) Output:["_col4","_col6","_col13","_col15","_col19","_col25"] Merge Join Operator [MERGEJOIN_247] (rows=182953402 width=300) - Conds:RS_54._col4=RS_277._col0(Inner),Output:["_col4","_col6","_col13","_col20","_col21","_col25"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_277] + Conds:RS_54._col4=RS_282._col0(Inner),Output:["_col4","_col6","_col13","_col20","_col21","_col25"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_282] PartitionCols:_col0 - Select Operator [SEL_276] (rows=462000 width=188) + Select Operator [SEL_281] (rows=462000 width=188) Output:["_col0","_col1"] TableScan [TS_29] (rows=462000 width=188) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] @@ -148,13 +150,13 @@ Stage-0 Filter Operator [FIL_53] (rows=182953402 width=132) predicate:(_col23 > _col14) Merge Join Operator [MERGEJOIN_246] (rows=548860207 width=132) - Conds:RS_50._col1=RS_275._col0(Inner),Output:["_col4","_col6","_col13","_col14","_col20","_col21","_col23"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_275] + Conds:RS_50._col1=RS_280._col0(Inner),Output:["_col4","_col6","_col13","_col14","_col20","_col21","_col23"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_280] PartitionCols:_col0 - Select Operator [SEL_274] (rows=73049 width=12) + Select Operator [SEL_279] (rows=73049 width=12) Output:["_col0","_col1"] - Filter Operator [FIL_273] (rows=73049 width=98) + Filter Operator [FIL_278] (rows=73049 width=98) predicate:UDFToDouble(d_date) is not null TableScan [TS_26] (rows=73049 width=98) default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] @@ -162,11 +164,11 @@ Stage-0 SHUFFLE [RS_50] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_245] (rows=548860207 width=127) - Conds:RS_47._col5=RS_272._col0(Left Outer),Output:["_col1","_col4","_col6","_col13","_col14","_col20","_col21"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_272] + Conds:RS_47._col5=RS_277._col0(Left Outer),Output:["_col1","_col4","_col6","_col13","_col14","_col20","_col21"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_277] PartitionCols:_col0 - Select Operator [SEL_271] (rows=2300 width=4) + Select Operator [SEL_276] (rows=2300 width=4) Output:["_col0"] TableScan [TS_24] (rows=2300 width=4) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] @@ -174,11 +176,11 @@ Stage-0 SHUFFLE [RS_47] PartitionCols:_col5 Merge Join Operator [MERGEJOIN_244] (rows=548860207 width=127) - Conds:RS_44._col17=RS_270._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col13","_col14","_col20"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_270] + Conds:RS_44._col17=RS_275._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col13","_col14","_col20"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_275] PartitionCols:_col0 - Select Operator [SEL_269] (rows=27 width=104) + Select Operator [SEL_274] (rows=27 width=104) Output:["_col0","_col1"] TableScan [TS_22] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] @@ -188,13 +190,13 @@ Stage-0 Filter Operator [FIL_43] (rows=548860207 width=39) predicate:(_col18 < _col7) Merge Join Operator [MERGEJOIN_243] (rows=1646580622 width=39) - Conds:RS_40._col10, _col4=RS_268._col0, _col1(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col13","_col14","_col17","_col18"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_268] + Conds:RS_40._col10, _col4=RS_273._col0, _col1(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col13","_col14","_col17","_col18"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_273] PartitionCols:_col0, _col1 - Select Operator [SEL_267] (rows=35703276 width=15) + Select Operator [SEL_272] (rows=35703276 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_266] (rows=35703276 width=15) + Filter Operator [FIL_271] (rows=35703276 width=15) predicate:inv_quantity_on_hand is not null TableScan [TS_19] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] @@ -207,36 +209,47 @@ Stage-0 SHUFFLE [RS_38] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_241] (rows=3621 width=20) - Conds:RS_251._col1=RS_254._col1(Inner),Output:["_col0","_col2","_col3","_col4"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_251] - PartitionCols:_col1 - Select Operator [SEL_250] (rows=73049 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_249] (rows=73049 width=8) - predicate:d_week_seq is not null - TableScan [TS_9] (rows=73049 width=8) - default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq"] + Conds:RS_259._col1=RS_251._col1(Inner),Output:["_col0","_col2","_col3","_col4"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] + PARTITION_ONLY_SHUFFLE [RS_251] PartitionCols:_col1 - Select Operator [SEL_253] (rows=652 width=16) + Select Operator [SEL_250] (rows=652 width=16) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_252] (rows=652 width=106) + Filter Operator [FIL_249] (rows=652 width=106) predicate:((d_year = 2001) and d_week_seq is not null and UDFToDouble(d_date) is not null) TableScan [TS_12] (rows=73049 width=106) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_week_seq","d_year"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_259] + PartitionCols:_col1 + Select Operator [SEL_258] (rows=73049 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_257] (rows=73049 width=8) + predicate:(d_week_seq is not null and d_week_seq BETWEEN DynamicValue(RS_16_d1_d_week_seq_min) AND DynamicValue(RS_16_d1_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_16_d1_d_week_seq_bloom_filter))) + TableScan [TS_9] (rows=73049 width=8) + default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_256] + Group By Operator [GBY_255] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_254] + Group By Operator [GBY_253] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_252] (rows=652 width=8) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_250] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_240] (rows=8138146 width=21) - Conds:RS_34._col3=RS_265._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7"] + Conds:RS_34._col3=RS_270._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] + SHUFFLE [RS_270] PartitionCols:_col0 - Select Operator [SEL_264] (rows=1440 width=4) + Select Operator [SEL_269] (rows=1440 width=4) Output:["_col0"] - Filter Operator [FIL_263] (rows=1440 width=96) + Filter Operator [FIL_268] (rows=1440 width=96) predicate:(hd_buy_potential = '1001-5000') TableScan [TS_6] (rows=7200 width=96) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] @@ -244,19 +257,19 @@ Stage-0 SHUFFLE [RS_34] PartitionCols:_col3 Merge Join Operator [MERGEJOIN_239] (rows=40690727 width=27) - Conds:RS_259._col2=RS_262._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + Conds:RS_264._col2=RS_267._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] + SHUFFLE [RS_264] PartitionCols:_col2 - Select Operator [SEL_258] (rows=280863798 width=31) + Select Operator [SEL_263] (rows=280863798 width=31) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_257] (rows=280863798 width=31) + Filter Operator [FIL_262] (rows=280863798 width=31) predicate:(cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_ship_date_sk is not null and cs_quantity is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=31) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) + BROADCAST [RS_261] + Group By Operator [GBY_260] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 16 [CUSTOM_SIMPLE_EDGE] SHUFFLE [RS_143] @@ -266,11 +279,11 @@ Stage-0 Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_241] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_262] + SHUFFLE [RS_267] PartitionCols:_col0 - Select Operator [SEL_261] (rows=265971 width=4) + Select Operator [SEL_266] (rows=265971 width=4) Output:["_col0"] - Filter Operator [FIL_260] (rows=265971 width=89) + Filter Operator [FIL_265] (rows=265971 width=89) predicate:(cd_marital_status = 'M') TableScan [TS_3] (rows=1861800 width=89) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out index 12f33788c2..a448f1b3c6 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out @@ -145,31 +145,33 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 20 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Map 15 <- Reducer 19 (BROADCAST_EDGE) +Reducer 10 <- Map 21 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 22 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 13 <- Map 20 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 12 <- Map 23 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 13 <- Map 21 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 3 <- Map 19 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 3 <- Map 20 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 14 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 21 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 9 <- Map 22 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_399] - Limit [LIM_398] (rows=100 width=260) + File Output Operator [FS_404] + Limit [LIM_403] (rows=100 width=260) Number of rows:100 - Select Operator [SEL_397] (rows=1260 width=260) + Select Operator [SEL_402] (rows=1260 width=260) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_121] @@ -178,13 +180,13 @@ Stage-0 Top N Key Operator [TNK_207] (rows=1260 width=132) keys:_col0, _col3,top n:100 Merge Join Operator [MERGEJOIN_364] (rows=1260 width=132) - Conds:RS_117._col0=RS_396._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6"] + Conds:RS_117._col0=RS_401._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6"] <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_396] + SHUFFLE [RS_401] PartitionCols:_col0 - Select Operator [SEL_395] (rows=1260 width=116) + Select Operator [SEL_400] (rows=1260 width=116) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_394] (rows=1260 width=108) + Group By Operator [GBY_399] (rows=1260 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_111] @@ -192,11 +194,11 @@ Stage-0 Group By Operator [GBY_110] (rows=1260 width=108) Output:["_col0","_col1"],aggregations:["sum(_col5)"],keys:_col7 Merge Join Operator [MERGEJOIN_362] (rows=2521 width=100) - Conds:RS_106._col4=RS_383._col0(Inner),Output:["_col5","_col7"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_383] + Conds:RS_106._col4=RS_388._col0(Inner),Output:["_col5","_col7"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_388] PartitionCols:_col0 - Select Operator [SEL_380] (rows=462000 width=104) + Select Operator [SEL_385] (rows=462000 width=104) Output:["_col0","_col1"] TableScan [TS_22] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] @@ -204,12 +206,12 @@ Stage-0 SHUFFLE [RS_106] PartitionCols:_col4 Merge Join Operator [MERGEJOIN_361] (rows=2521 width=4) - Conds:RS_103._col0=RS_393._col0(Inner),Output:["_col4","_col5"] + Conds:RS_103._col0=RS_398._col0(Inner),Output:["_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_352] (rows=2 width=4) - Conds:RS_367._col1=RS_376._col0(Inner),Output:["_col0"] + Conds:RS_367._col1=RS_381._col0(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_367] PartitionCols:_col1 @@ -220,9 +222,9 @@ Stage-0 TableScan [TS_0] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_376] + SHUFFLE [RS_381] PartitionCols:_col0 - Group By Operator [GBY_375] (rows=2 width=94) + Group By Operator [GBY_380] (rows=2 width=94) Output:["_col0"],keys:KEY._col0 <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_16] @@ -230,33 +232,44 @@ Stage-0 Group By Operator [GBY_15] (rows=2 width=94) Output:["_col0"],keys:_col0 Merge Join Operator [MERGEJOIN_351] (rows=5 width=94) - Conds:RS_370._col1=RS_374._col0(Left Semi),Output:["_col0"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_370] - PartitionCols:_col1 - Select Operator [SEL_369] (rows=73049 width=98) - Output:["_col0","_col1"] - Filter Operator [FIL_368] (rows=73049 width=98) - predicate:(d_week_seq is not null and d_date is not null) - TableScan [TS_3] (rows=73049 width=98) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] + Conds:RS_379._col1=RS_371._col0(Left Semi),Output:["_col0"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_374] + PARTITION_ONLY_SHUFFLE [RS_371] PartitionCols:_col0 - Group By Operator [GBY_373] (rows=1 width=4) + Group By Operator [GBY_370] (rows=1 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_372] (rows=2 width=4) + Select Operator [SEL_369] (rows=2 width=4) Output:["_col0"] - Filter Operator [FIL_371] (rows=2 width=98) + Filter Operator [FIL_368] (rows=2 width=98) predicate:((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_393] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_379] + PartitionCols:_col1 + Select Operator [SEL_378] (rows=73049 width=98) + Output:["_col0","_col1"] + Filter Operator [FIL_377] (rows=73049 width=98) + predicate:(d_week_seq is not null and d_date is not null and d_week_seq BETWEEN DynamicValue(RS_12_date_dim_d_week_seq_min) AND DynamicValue(RS_12_date_dim_d_week_seq_max) and in_bloom_filter(d_week_seq, DynamicValue(RS_12_date_dim_d_week_seq_bloom_filter))) + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_376] + Group By Operator [GBY_375] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_374] + Group By Operator [GBY_373] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_372] (rows=1 width=4) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_370] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_398] PartitionCols:_col0 - Select Operator [SEL_392] (rows=13749816 width=11) + Select Operator [SEL_397] (rows=13749816 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_391] (rows=13749816 width=11) + Filter Operator [FIL_396] (rows=13749816 width=11) predicate:wr_returned_date_sk is not null TableScan [TS_95] (rows=14398467 width=11) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_item_sk","wr_return_quantity"] @@ -264,11 +277,11 @@ Stage-0 SHUFFLE [RS_117] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_363] (rows=2739 width=116) - Conds:RS_385._col0=RS_390._col0(Inner),Output:["_col0","_col1","_col3"] + Conds:RS_390._col0=RS_395._col0(Inner),Output:["_col0","_col1","_col3"] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_390] + SHUFFLE [RS_395] PartitionCols:_col0 - Group By Operator [GBY_389] (rows=5552 width=108) + Group By Operator [GBY_394] (rows=5552 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_73] @@ -276,33 +289,33 @@ Stage-0 Group By Operator [GBY_72] (rows=5552 width=108) Output:["_col0","_col1"],aggregations:["sum(_col5)"],keys:_col7 Merge Join Operator [MERGEJOIN_358] (rows=11105 width=100) - Conds:RS_68._col4=RS_382._col0(Inner),Output:["_col5","_col7"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_382] + Conds:RS_68._col4=RS_387._col0(Inner),Output:["_col5","_col7"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_387] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_380] + Please refer to the previous Select Operator [SEL_385] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_68] PartitionCols:_col4 Merge Join Operator [MERGEJOIN_357] (rows=11105 width=4) - Conds:RS_65._col0=RS_388._col0(Inner),Output:["_col4","_col5"] + Conds:RS_65._col0=RS_393._col0(Inner),Output:["_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_352] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_388] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_393] PartitionCols:_col0 - Select Operator [SEL_387] (rows=55578005 width=11) + Select Operator [SEL_392] (rows=55578005 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_386] (rows=55578005 width=11) + Filter Operator [FIL_391] (rows=55578005 width=11) predicate:sr_returned_date_sk is not null TableScan [TS_57] (rows=57591150 width=11) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_return_quantity"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_385] + SHUFFLE [RS_390] PartitionCols:_col0 - Group By Operator [GBY_384] (rows=2739 width=108) + Group By Operator [GBY_389] (rows=2739 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_35] @@ -310,26 +323,26 @@ Stage-0 Group By Operator [GBY_34] (rows=2739 width=108) Output:["_col0","_col1"],aggregations:["sum(_col5)"],keys:_col7 Merge Join Operator [MERGEJOIN_354] (rows=5478 width=100) - Conds:RS_30._col4=RS_381._col0(Inner),Output:["_col5","_col7"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_381] + Conds:RS_30._col4=RS_386._col0(Inner),Output:["_col5","_col7"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_386] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_380] + Please refer to the previous Select Operator [SEL_385] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col4 Merge Join Operator [MERGEJOIN_353] (rows=5478 width=4) - Conds:RS_27._col0=RS_379._col0(Inner),Output:["_col4","_col5"] + Conds:RS_27._col0=RS_384._col0(Inner),Output:["_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_352] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_379] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_384] PartitionCols:_col0 - Select Operator [SEL_378] (rows=28798881 width=11) + Select Operator [SEL_383] (rows=28798881 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_377] (rows=28798881 width=11) + Filter Operator [FIL_382] (rows=28798881 width=11) predicate:cr_returned_date_sk is not null TableScan [TS_19] (rows=28798881 width=11) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_item_sk","cr_return_quantity"]