diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index 8a62982e1e..562caf90f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -226,6 +226,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje if (tabAliasBuilder.length() > 0) { tableAlias = tabAliasBuilder.toString(); } else { + //falling back Operator op = ctx.generator; while (!(op == null || op instanceof TableScanOperator)) { @@ -361,6 +362,10 @@ private boolean processSemiJoinHints( if (!colName.equals(sjHint.getColName())) { continue; } + if (!ts.getConf().getAlias().equals(sjHint.getTarget())) { + continue; + } + // match! LOG.info("Creating runtime filter due to user hint: column = " + colName); if (generateSemiJoinOperatorPlan(ctx, pCtx, ts, keyBaseAlias, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d514644da0..9e84a29470 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9034,8 +9034,8 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { /** Parses semjoin hints in the query and returns the table names mapped to filter size, or -1 if not specified. * Hints can be in 2 formats - * 1. TableName, ColumnName, bloom filter entries - * 2. TableName, ColumnName + * 1. TableName, ColumnName, Target-TableName, bloom filter entries + * 2. TableName, ColumnName, Target-TableName * */ private Map> parseSemiJoinHint(List hints) throws SemanticException { if (hints == null || hints.size() == 0) return null; @@ -9071,15 +9071,15 @@ private int parseSingleSemiJoinHint(Tree args, int curIdx, Map 2) { + if (numEntriesLeft > 3) { // Check if there exists bloom filter size entry try { number = Integer.parseInt(args.getChild(curIdx).getText()); @@ -9097,7 +9103,7 @@ private int parseSingleSemiJoinHint(Tree args, int curIdx, Map new ArrayList<>()).add(new SemiJoinHint(colName, number)); + result.computeIfAbsent(source, value -> new ArrayList<>()).add(new SemiJoinHint(colName, target, number)); return curIdx; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java index f7fd306a2c..b2c123fbc5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java @@ -20,17 +20,21 @@ public class SemiJoinHint { private String colName; + private String target; private Integer numEntries; - public SemiJoinHint(String colName, Integer numEntries) { + public SemiJoinHint(String colName, String target, Integer numEntries) { this.colName = colName; + this.target = target; this.numEntries = numEntries; } public String getColName() { return colName; } - + public String getTarget() { + return target; + } public Integer getNumEntries() { return numEntries != null ? numEntries : -1; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 7e156f610a..20f16fbb2b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -743,15 +743,15 @@ private static void removeSemijoinOptimizationFromSMBJoins( SemiJoinBranchInfo sjInfo = pctx.getRsToSemiJoinBranchInfo().get(rs); if (sjInfo != null && ts == sjInfo.getTsOp()) { // match! + if (sjInfo.getIsHint()) { + throw new SemanticException("Removing hinted semijoin as it is with SMB join " + rs + " : " + ts); + } if (LOG.isDebugEnabled()) { LOG.debug("Semijoin optimization found going to SMB join. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); } GenTezUtils.removeBranch(rs); GenTezUtils.removeSemiJoinOperator(pctx, rs, ts); - if (sjInfo.getIsHint()) { - LOG.debug("Removing hinted semijoin as it is with SMB join " + rs + " : " + ts); - } } } } @@ -848,15 +848,15 @@ private static void removeSemiJoinCyclesDueToMapsideJoins( if (parent == ts) { // We have a cycle! + if (sjInfo.getIsHint()) { + throw new SemanticException("Removing hinted semijoin as it is creating cycles with mapside joins " + rs + " : " + ts); + } if (LOG.isDebugEnabled()) { LOG.debug("Semijoin cycle due to mapjoin. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); } GenTezUtils.removeBranch(rs); GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); - if (sjInfo.getIsHint()) { - LOG.debug("Removing hinted semijoin as it is creating cycles with mapside joins " + rs + " : " + ts); - } } } } @@ -895,6 +895,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries(); if (expectedEntries == -1 || expectedEntries > pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) { + if (sjInfo.getIsHint()) { + throw new SemanticException("Removing hinted semijoin due to lack to stats" + + " or exceeding max bloom filter entries"); + } // Remove the semijoin optimization branch along with ALL the mappings // The parent GB2 has all the branches. Collect them and remove them. for (Operator op : gbOp.getChildOperators()) { diff --git a/ql/src/test/queries/clientpositive/semijoin_hint.q b/ql/src/test/queries/clientpositive/semijoin_hint.q index 71fa445ec8..2a15344b58 100644 --- a/ql/src/test/queries/clientpositive/semijoin_hint.q +++ b/ql/src/test/queries/clientpositive/semijoin_hint.q @@ -49,51 +49,51 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k -- Skip semijoin by using keyword "None" as argument explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); +EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -- This should NOT create a semijoin -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); set hive.cbo.returnpath.hiveop=false; -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); -- Query which creates semijoin explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -- Skip semijoin by using keyword "None" as argument explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); +EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -- This should NOT create a semijoin -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); set hive.cbo.enable=false; -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); -- Query which creates semijoin explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -- Skip semijoin by using keyword "None" as argument explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); +EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -- This should NOT create a semijoin -explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); +explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index ae9bf9bd8b..76c985e727 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -387,9 +387,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -552,9 +552,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -566,7 +566,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 5 <- Reducer 4 (BROADCAST_EDGE) - Map 6 <- Reducer 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) @@ -629,10 +628,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) + predicate: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -695,10 +694,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -706,9 +701,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -833,9 +828,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -933,13 +928,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +PREHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +POSTHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1390,9 +1385,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1553,9 +1548,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1567,7 +1562,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 5 <- Reducer 4 (BROADCAST_EDGE) - Map 6 <- Reducer 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) @@ -1630,10 +1624,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) + predicate: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -1694,10 +1688,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1705,9 +1695,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1830,9 +1820,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1928,13 +1918,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +PREHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) +POSTHOOK: query: explain select /*+ semi(k, str, s, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, d, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2353,9 +2343,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(srcpart_date, str, v, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2504,9 +2494,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, v, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2517,7 +2507,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) Map 4 <- Reducer 6 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) @@ -2528,10 +2517,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) + predicate: str is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) @@ -2633,10 +2622,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -2644,9 +2629,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2761,9 +2746,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +PREHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) +POSTHOOK: query: explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage