diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index b8c01020b7..460496844b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -220,23 +220,34 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje } String tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias()); - keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + "_" + column; - - Map hints = parseContext.getSemiJoinHints(); - if (hints != null) { - // If hints map has no entry that would imply that user enforced - // no runtime filtering. - if (hints.size() > 0) { - SemiJoinHint sjHint = hints.get(tableAlias); - semiJoinAttempted = generateSemiJoinOperatorPlan( - ctx, parseContext, ts, keyBaseAlias, sjHint); - if (!semiJoinAttempted && sjHint != null) { - throw new SemanticException("The user hint to enforce semijoin failed required conditions"); + StringBuilder internalColNameBuilder = new StringBuilder(); + StringBuilder colNameBuilder = new StringBuilder(); + if (getColumnName(ctx, internalColNameBuilder, colNameBuilder)) { + String colName = colNameBuilder.toString(); + keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + "_" + + (colName != null ? colName : column); + Map hints = parseContext.getSemiJoinHints(); + if (hints != null) { + if (hints.size() > 0) { + SemiJoinHint sjHint = hints.get(tableAlias); + if (sjHint != null && sjHint.getColName() != null && + (colName == null || !colName.equals(sjHint.getColName()))) { + LOG.debug("Removed hint due to column mismatch + Col = " + colName + " hint column = " + sjHint.getColName()); + sjHint = null; + } + semiJoinAttempted = generateSemiJoinOperatorPlan( + ctx, parseContext, ts, keyBaseAlias, + internalColNameBuilder.toString(), colName, sjHint); + if (!semiJoinAttempted && sjHint != null) { + throw new SemanticException("The user hint to enforce semijoin failed required conditions"); + } } + } else { + // fallback to regular logic + semiJoinAttempted = generateSemiJoinOperatorPlan( + ctx, parseContext, ts, keyBaseAlias, + internalColNameBuilder.toString(), colName, null); } - } else { - semiJoinAttempted = generateSemiJoinOperatorPlan( - ctx, parseContext, ts, keyBaseAlias, null); } } } @@ -285,6 +296,34 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje return false; } + // Given a key, find the corresponding column name. + private boolean getColumnName(DynamicListContext ctx, StringBuilder internalColName, + StringBuilder colName) { + ExprNodeDesc exprNodeDesc = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex()); + ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc); + + if (colExpr == null) { + return false; + } + + internalColName.append(colExpr.getColumn()); + Operator parentOfRS = ctx.generator.getParentOperators().get(0); + if (!(parentOfRS instanceof SelectOperator)) { + colName.append(internalColName.toString()); + return true; + } + + exprNodeDesc = parentOfRS.getColumnExprMap().get(internalColName.toString()); + colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc); + + if (colExpr == null) { + return false; + } + + colName.append(ExprNodeDescUtils.extractColName(colExpr)); + return true; + } + private void replaceExprNode(DynamicListContext ctx, FilterDesc desc, ExprNodeDesc node) { if (ctx.grandParent == null) { desc.setPredicate(node); @@ -400,7 +439,8 @@ private void generateEventOperatorPlan(DynamicListContext ctx, ParseContext pars // Generates plan for min/max when dynamic partition pruning is ruled out. private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContext parseContext, - TableScanOperator ts, String keyBaseAlias, SemiJoinHint sjHint) throws SemanticException { + TableScanOperator ts, String keyBaseAlias, String internalColName, + String colName, SemiJoinHint sjHint) throws SemanticException { // If semijoin hint is enforced, make sure hint is provided if (parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY) @@ -414,41 +454,10 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex // we need the expr that generated the key of the reduce sink ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex()); - String internalColName = null; - ExprNodeDesc exprNodeDesc = key; - // Find the ExprNodeColumnDesc - while (!(exprNodeDesc instanceof ExprNodeColumnDesc) && - (exprNodeDesc.getChildren() != null)) { - exprNodeDesc = exprNodeDesc.getChildren().get(0); - } - - if (!(exprNodeDesc instanceof ExprNodeColumnDesc)) { - // No column found! - // Bail out - return false; - } - - internalColName = ((ExprNodeColumnDesc) exprNodeDesc).getColumn(); - if (parentOfRS instanceof SelectOperator) { - // Make sure the semijoin branch is not on partition column. - ExprNodeDesc expr = parentOfRS.getColumnExprMap().get(internalColName); - while (!(expr instanceof ExprNodeColumnDesc) && - (expr.getChildren() != null)) { - expr = expr.getChildren().get(0); - } - - if (!(expr instanceof ExprNodeColumnDesc)) { - // No column found! - // Bail out - return false; - } - - ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc) expr; - String colName = ExprNodeDescUtils.extractColName(colExpr); - + if (colName != null) { // Fetch the TableScan Operator. - Operator op = parentOfRS.getParentOperators().get(0); - while (op != null && !(op instanceof TableScanOperator)) { + Operator op = parentOfRS; + while (!(op == null || op instanceof TableScanOperator)) { op = op.getParentOperators().get(0); } assert op != null; diff --git a/ql/src/test/queries/clientpositive/semijoin_hint.q b/ql/src/test/queries/clientpositive/semijoin_hint.q index a3cd1d664d..5fbc273e7d 100644 --- a/ql/src/test/queries/clientpositive/semijoin_hint.q +++ b/ql/src/test/queries/clientpositive/semijoin_hint.q @@ -54,6 +54,12 @@ EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_sm explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); +set hive.tez.dynamic.semijoin.reduction.hint.only=true; +-- This should NOT create a semijoin as the join is on different column +explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); +set hive.tez.dynamic.semijoin.reduction.hint.only=false; + + set hive.cbo.returnpath.hiveop=false; explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) @@ -70,6 +76,12 @@ EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_sm explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); +set hive.tez.dynamic.semijoin.reduction.hint.only=true; +-- This should NOT create a semijoin as the join is on different column +explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); +set hive.tez.dynamic.semijoin.reduction.hint.only=false; + + set hive.cbo.enable=false; explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) @@ -86,4 +98,7 @@ EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_sm explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); - +set hive.tez.dynamic.semijoin.reduction.hint.only=true; +-- This should NOT create a semijoin as the join is on different column +explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); +set hive.tez.dynamic.semijoin.reduction.hint.only=false; diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 1d1f86bfaa..e3ffcfa857 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -288,10 +288,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) + predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -704,10 +704,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_int - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_cstring_min) AND DynamicValue(RS_11_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_cstring_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_key_min) AND DynamicValue(RS_11_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_key_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_cstring_min) AND DynamicValue(RS_11_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_cstring_bloom_filter)))) (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_key_min) AND DynamicValue(RS_11_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_key_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -757,10 +757,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) + predicate: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -1019,10 +1019,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))) and (value BETWEEN DynamicValue(RS_7_srcpart_small_value_min) AND DynamicValue(RS_7_srcpart_small_value_max) and in_bloom_filter(value, DynamicValue(RS_7_srcpart_small_value_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter))) and (value BETWEEN DynamicValue(RS_7_srcpart_small_value1_min) AND DynamicValue(RS_7_srcpart_small_value1_max) and in_bloom_filter(value, DynamicValue(RS_7_srcpart_small_value1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))) and (value BETWEEN DynamicValue(RS_7_srcpart_small_value_min) AND DynamicValue(RS_7_srcpart_small_value_max) and in_bloom_filter(value, DynamicValue(RS_7_srcpart_small_value_bloom_filter)))) (type: boolean) + predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter))) and (value BETWEEN DynamicValue(RS_7_srcpart_small_value1_min) AND DynamicValue(RS_7_srcpart_small_value1_max) and in_bloom_filter(value, DynamicValue(RS_7_srcpart_small_value1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -1348,10 +1348,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) + predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -1402,10 +1402,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_int - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -1556,12 +1556,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) + predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out index a5fdd90811..cb69251274 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out @@ -105,10 +105,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tt2 - filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_18_min) AND DynamicValue(RS_23_t1_timestamp_col_18_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_18_bloom_filter)))) (type: boolean) + filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1__col3_min) AND DynamicValue(RS_23_t1__col3_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1__col3_bloom_filter)))) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_18_min) AND DynamicValue(RS_23_t1_timestamp_col_18_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_18_bloom_filter)))) (type: boolean) + predicate: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1__col3_min) AND DynamicValue(RS_23_t1__col3_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1__col3_bloom_filter)))) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp) diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index 8950b70b09..2c1f6e59d0 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -517,7 +517,7 @@ STAGE PLANS: alias: t Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (a BETWEEN DynamicValue(RS_10_nonacidorctbl_a_min) AND DynamicValue(RS_10_nonacidorctbl_a_max) and in_bloom_filter(a, DynamicValue(RS_10_nonacidorctbl_a_bloom_filter))) (type: boolean) + predicate: (a BETWEEN DynamicValue(RS_10_nonacidorctbl__col0_min) AND DynamicValue(RS_10_nonacidorctbl__col0_max) and in_bloom_filter(a, DynamicValue(RS_10_nonacidorctbl__col0_bloom_filter))) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: a (type: int) diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out index b910df4f34..0098b893f4 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out @@ -189,7 +189,7 @@ Stage-0 Select Operator [SEL_2] (rows=2000 width=87) Output:["_col0"] Filter Operator [FIL_17] (rows=2000 width=87) - predicate:(key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) + predicate:(key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) TableScan [TS_0] (rows=2000 width=87) default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Reducer 5 [BROADCAST_EDGE] llap @@ -405,7 +405,7 @@ Stage-0 Select Operator [SEL_8] (rows=2000 width=87) Output:["_col0"] Filter Operator [FIL_27] (rows=2000 width=87) - predicate:(key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) + predicate:(key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) TableScan [TS_6] (rows=2000 width=87) default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Reducer 5 [BROADCAST_EDGE] llap @@ -425,7 +425,7 @@ Stage-0 Select Operator [SEL_2] (rows=9174 width=70) Output:["_col0"] Filter Operator [FIL_25] (rows=9174 width=70) - predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_cstring_min) AND DynamicValue(RS_11_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_cstring_bloom_filter)))) + predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_key1_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_key_min) AND DynamicValue(RS_11_srcpart_date_key_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_key_bloom_filter)))) TableScan [TS_0] (rows=12288 width=70) default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] <-Reducer 5 [BROADCAST_EDGE] llap @@ -568,7 +568,7 @@ Stage-0 Select Operator [SEL_2] (rows=2000 width=178) Output:["_col0","_col1"] Filter Operator [FIL_17] (rows=2000 width=178) - predicate:(key is not null and value is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))) and (value BETWEEN DynamicValue(RS_7_srcpart_small_value_min) AND DynamicValue(RS_7_srcpart_small_value_max) and in_bloom_filter(value, DynamicValue(RS_7_srcpart_small_value_bloom_filter)))) + predicate:(key is not null and value is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter))) and (value BETWEEN DynamicValue(RS_7_srcpart_small_value1_min) AND DynamicValue(RS_7_srcpart_small_value1_max) and in_bloom_filter(value, DynamicValue(RS_7_srcpart_small_value1_bloom_filter)))) TableScan [TS_0] (rows=2000 width=178) default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Reducer 5 [BROADCAST_EDGE] llap @@ -741,7 +741,7 @@ Stage-0 Select Operator [SEL_2] (rows=2000 width=178) Output:["_col0","_col1"] Filter Operator [FIL_26] (rows=2000 width=178) - predicate:(key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) + predicate:(key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key1_min) AND DynamicValue(RS_10_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key1_bloom_filter)))) TableScan [TS_0] (rows=2000 width=178) default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Reducer 7 [BROADCAST_EDGE] llap @@ -761,7 +761,7 @@ Stage-0 Select Operator [SEL_8] (rows=9174 width=70) Output:["_col0"] Filter Operator [FIL_28] (rows=9174 width=70) - predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) + predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter)))) TableScan [TS_6] (rows=12288 width=70) default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] <-Reducer 5 [BROADCAST_EDGE] llap @@ -820,12 +820,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) + predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index 388888ef1c..3ffc2352a4 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -183,10 +183,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_7_v_str_min) AND DynamicValue(RS_7_v_str_max) and in_bloom_filter(str, DynamicValue(RS_7_v_str_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_7_v_key1_min) AND DynamicValue(RS_7_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_7_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_7_v_str_min) AND DynamicValue(RS_7_v_str_max) and in_bloom_filter(str, DynamicValue(RS_7_v_str_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(RS_7_v_key1_min) AND DynamicValue(RS_7_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_7_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -413,10 +413,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: i - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_cstring_min) AND DynamicValue(RS_7_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_cstring_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_value_min) AND DynamicValue(RS_7_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_value_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_cstring_min) AND DynamicValue(RS_7_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_cstring_bloom_filter)))) (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_value_min) AND DynamicValue(RS_7_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_value_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -453,7 +453,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -467,10 +467,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_key1_min) AND DynamicValue(RS_9_i_key1_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_key1_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_key1_min) AND DynamicValue(RS_9_i_key1_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_key1_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -564,7 +564,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -636,10 +636,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_i_key1_min) AND DynamicValue(RS_3_i_key1_max) and in_bloom_filter(key1, DynamicValue(RS_3_i_key1_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_i_key1_min) AND DynamicValue(RS_3_i_key1_max) and in_bloom_filter(key1, DynamicValue(RS_3_i_key1_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -656,10 +656,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_3_i_str_min) AND DynamicValue(RS_3_i_str_max) and in_bloom_filter(str, DynamicValue(RS_3_i_str_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_3_i_str_min) AND DynamicValue(RS_3_i_str_max) and in_bloom_filter(str, DynamicValue(RS_3_i_str_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -789,10 +789,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_key1_min) AND DynamicValue(RS_3_k_key1_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_key1_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_str_min) AND DynamicValue(RS_3_k_str_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_key1_min) AND DynamicValue(RS_3_k_key1_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_key1_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_str_min) AND DynamicValue(RS_3_k_str_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -860,6 +860,106 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: k + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: v + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: key1 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 value (type: string) + 1 key1 (type: string) + Statistics: Num rows: 9345 Data size: 74760 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + Statistics: Num rows: 9345 Data size: 37380 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) @@ -957,10 +1057,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_6_k_key1_min) AND DynamicValue(RS_6_k_key1_max) and in_bloom_filter(key1, DynamicValue(RS_6_k_key1_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_6_k_str_min) AND DynamicValue(RS_6_k_str_max) and in_bloom_filter(key1, DynamicValue(RS_6_k_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_6_k_key1_min) AND DynamicValue(RS_6_k_key1_max) and in_bloom_filter(key1, DynamicValue(RS_6_k_key1_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_6_k_str_min) AND DynamicValue(RS_6_k_str_max) and in_bloom_filter(key1, DynamicValue(RS_6_k_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -977,10 +1077,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_21_v_str_min) AND DynamicValue(RS_21_v_str_max) and in_bloom_filter(str, DynamicValue(RS_21_v_str_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_21_v_key1_min) AND DynamicValue(RS_21_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_21_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_21_v_str_min) AND DynamicValue(RS_21_v_str_max) and in_bloom_filter(str, DynamicValue(RS_21_v_str_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(RS_21_v_key1_min) AND DynamicValue(RS_21_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_21_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -1117,10 +1217,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_7_v_str_min) AND DynamicValue(RS_7_v_str_max) and in_bloom_filter(str, DynamicValue(RS_7_v_str_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_7_v_key1_min) AND DynamicValue(RS_7_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_7_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_7_v_str_min) AND DynamicValue(RS_7_v_str_max) and in_bloom_filter(str, DynamicValue(RS_7_v_str_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(RS_7_v_key1_min) AND DynamicValue(RS_7_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_7_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -1343,10 +1443,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: i - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_cstring_min) AND DynamicValue(RS_10_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_cstring_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_value_min) AND DynamicValue(RS_10_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_value_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_cstring_min) AND DynamicValue(RS_10_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_cstring_bloom_filter)))) (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_value_min) AND DynamicValue(RS_10_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_value_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -1363,10 +1463,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_str_min) AND DynamicValue(RS_13_v_str_max) and in_bloom_filter(str, DynamicValue(RS_13_v_str_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_key1_min) AND DynamicValue(RS_13_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_13_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_str_min) AND DynamicValue(RS_13_v_str_max) and in_bloom_filter(str, DynamicValue(RS_13_v_str_bloom_filter)))) (type: boolean) + predicate: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_key1_min) AND DynamicValue(RS_13_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_13_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string), value (type: string) @@ -1383,7 +1483,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -1480,7 +1580,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -1564,10 +1664,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_key1_min) AND DynamicValue(RS_9_i_key1_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_key1_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_key1_min) AND DynamicValue(RS_9_i_key1_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_key1_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -1584,10 +1684,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(str, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(str, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -1715,10 +1815,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_6_k_key1_min) AND DynamicValue(RS_6_k_key1_max) and in_bloom_filter(key1, DynamicValue(RS_6_k_key1_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_6_k_str_min) AND DynamicValue(RS_6_k_str_max) and in_bloom_filter(key1, DynamicValue(RS_6_k_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_6_k_key1_min) AND DynamicValue(RS_6_k_key1_max) and in_bloom_filter(key1, DynamicValue(RS_6_k_key1_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_6_k_str_min) AND DynamicValue(RS_6_k_str_max) and in_bloom_filter(key1, DynamicValue(RS_6_k_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -1784,6 +1884,104 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: k + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: v + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 9345 Data size: 74760 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) @@ -1873,10 +2071,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_key1_min) AND DynamicValue(RS_3_k_key1_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_key1_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_str_min) AND DynamicValue(RS_3_k_str_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_key1_min) AND DynamicValue(RS_3_k_key1_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_key1_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_str_min) AND DynamicValue(RS_3_k_str_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: key1 (type: string) @@ -1889,10 +2087,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_17_v_str_min) AND DynamicValue(RS_17_v_str_max) and in_bloom_filter(str, DynamicValue(RS_17_v_str_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_17_v_key1_min) AND DynamicValue(RS_17_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_17_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_17_v_str_min) AND DynamicValue(RS_17_v_str_max) and in_bloom_filter(str, DynamicValue(RS_17_v_str_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(RS_17_v_key1_min) AND DynamicValue(RS_17_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_17_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) @@ -2025,10 +2223,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_5_v_str_min) AND DynamicValue(RS_5_v_str_max) and in_bloom_filter(str, DynamicValue(RS_5_v_str_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_5_v_key1_min) AND DynamicValue(RS_5_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_5_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_5_v_str_min) AND DynamicValue(RS_5_v_str_max) and in_bloom_filter(str, DynamicValue(RS_5_v_str_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(RS_5_v_key1_min) AND DynamicValue(RS_5_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_5_v_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) @@ -2265,10 +2463,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_4_srcpart_date_key1_min) AND DynamicValue(RS_4_srcpart_date_key1_max) and in_bloom_filter(key1, DynamicValue(RS_4_srcpart_date_key1_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_4_srcpart_date_str_min) AND DynamicValue(RS_4_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_4_srcpart_date_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_4_srcpart_date_key1_min) AND DynamicValue(RS_4_srcpart_date_key1_max) and in_bloom_filter(key1, DynamicValue(RS_4_srcpart_date_key1_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_4_srcpart_date_str_min) AND DynamicValue(RS_4_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_4_srcpart_date_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: key1 (type: string) @@ -2281,10 +2479,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: i - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_cstring_min) AND DynamicValue(RS_9_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_cstring_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date__col1_min) AND DynamicValue(RS_9_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date__col1_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_cstring_min) AND DynamicValue(RS_9_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_cstring_bloom_filter)))) (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date__col1_min) AND DynamicValue(RS_9_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date__col1_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: cstring (type: string) @@ -2314,7 +2512,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -2360,7 +2558,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -2411,10 +2609,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_8_i_str_min) AND DynamicValue(RS_8_i_str_max) and in_bloom_filter(str, DynamicValue(RS_8_i_str_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_8_i_str_min) AND DynamicValue(RS_8_i_str_max) and in_bloom_filter(str, DynamicValue(RS_8_i_str_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) @@ -2427,10 +2625,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_8_i_key1_min) AND DynamicValue(RS_8_i_key1_max) and in_bloom_filter(key1, DynamicValue(RS_8_i_key1_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_8_i_key1_min) AND DynamicValue(RS_8_i_key1_max) and in_bloom_filter(key1, DynamicValue(RS_8_i_key1_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: key1 (type: string) @@ -2579,10 +2777,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_key1_min) AND DynamicValue(RS_3_k_key1_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_key1_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_str_min) AND DynamicValue(RS_3_k_str_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_key1_min) AND DynamicValue(RS_3_k_key1_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_key1_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_k_str_min) AND DynamicValue(RS_3_k_str_max) and in_bloom_filter(key1, DynamicValue(RS_3_k_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: key1 (type: string) @@ -2644,3 +2842,93 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: k + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: v + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 value (type: string) + 1 key1 (type: string) + Statistics: Num rows: 9345 Data size: 74760 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +