diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index f56b9cbb18..48885ed99b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -25,6 +25,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Stack; import org.apache.hadoop.hive.conf.HiveConf; @@ -213,44 +214,35 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje LOG.debug("Initiate semijoin reduction for " + column + " (" + ts.getConf().getFilterExpr().getExprString()); // Get the table name from which the min-max values and bloom filter will come. - Operator op = ctx.generator; - - while (!(op == null || op instanceof TableScanOperator)) { - op = op.getParentOperators().get(0); - } - String tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias()); + Set tsOps = OperatorUtils.findOperatorsUpstream( + ctx.generator, TableScanOperator.class); + // There ought to be atleast one TS Op as parent. + assert tsOps != null && tsOps.size() > 0; StringBuilder internalColNameBuilder = new StringBuilder(); StringBuilder colNameBuilder = new StringBuilder(); - if (getColumnName(ctx, internalColNameBuilder, colNameBuilder)) { + if (getColumnInfo(ctx, internalColNameBuilder, colNameBuilder)) { String colName = colNameBuilder.toString(); + String tableAlias = tsOps.iterator().next().getConf().getAlias(); keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + "_" + colName; - Map hints = parseContext.getSemiJoinHints(); + Map> hints = parseContext.getSemiJoinHints(); if (hints != null) { - if (hints.size() > 0) { - SemiJoinHint sjHint = hints.get(tableAlias); - if (sjHint != null && sjHint.getColName() != null && - !colName.equals(sjHint.getColName())) { - LOG.debug("Removed hint due to column mismatch + Col = " + colName + " hint column = " + sjHint.getColName()); - sjHint = null; - } - semiJoinAttempted = generateSemiJoinOperatorPlan( - ctx, parseContext, ts, keyBaseAlias, - internalColNameBuilder.toString(), colName, sjHint); - if (!semiJoinAttempted && sjHint != null) { - throw new SemanticException("The user hint to enforce semijoin failed required conditions"); - } - } + // process semijoin optimization only if a hint is given. + StringBuilder keyBaseAliasBuilder = new StringBuilder(); + semiJoinAttempted = processSemiJoinHints(parseContext, ctx, + tsOps, hints, internalColNameBuilder.toString(), + colName, ts, keyBaseAliasBuilder); + keyBaseAlias = keyBaseAliasBuilder.toString(); } else { // fallback to regular logic semiJoinAttempted = generateSemiJoinOperatorPlan( ctx, parseContext, ts, keyBaseAlias, internalColNameBuilder.toString(), colName, null); - } - } + } // hints != null + } // getColumnInfo() } - } + } // no DPP // If semijoin is attempted then replace the condition with a min-max filter // and bloom filter else, @@ -297,7 +289,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje } // Given a key, find the corresponding column name. - private boolean getColumnName(DynamicListContext ctx, StringBuilder internalColName, + private boolean getColumnInfo(DynamicListContext ctx, StringBuilder internalColName, StringBuilder colName) { ExprNodeDesc exprNodeDesc = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex()); ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc); @@ -324,6 +316,46 @@ private boolean getColumnName(DynamicListContext ctx, StringBuilder internalColN return true; } + // Handle hint based semijoin + private boolean processSemiJoinHints( + ParseContext pCtx, DynamicListContext ctx, + Set tsOps, Map> hints, + String internalColName, String colName, TableScanOperator ts, + StringBuilder keyBaseAliasBuilder) throws SemanticException { + if (hints.size() == 0) { + return false; + } + + String tableAlias; + // In a given set of ts ops, there can be only one alias + colName combo + for (TableScanOperator tsOp : tsOps) { + // set table alias + tableAlias = tsOp.getConf().getAlias(); + List hintList = hints.get(tableAlias); + if (hintList == null) { + continue; + } + + // Iterate through the list + for (SemiJoinHint sjHint : hintList) { + if (!colName.equals(sjHint.getColName())) { + continue; + } + // match! + String keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + + "_" + colName; + keyBaseAliasBuilder.append(keyBaseAlias); + LOG.info("Creating runtime filter due to user hint: column = " + colName); + if (generateSemiJoinOperatorPlan(ctx, pCtx, ts, keyBaseAlias, + internalColName, colName, sjHint)) { + return true; + } + throw new SemanticException("The user hint to enforce semijoin failed required conditions"); + } + } + return true; + } + private void replaceExprNode(DynamicListContext ctx, FilterDesc desc, ExprNodeDesc node) { if (ctx.grandParent == null) { desc.setPredicate(node); @@ -442,12 +474,6 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex TableScanOperator ts, String keyBaseAlias, String internalColName, String colName, SemiJoinHint sjHint) throws SemanticException { - // If semijoin hint is enforced, make sure hint is provided - if (parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY) - && sjHint == null) { - return false; - } - // we will put a fork in the plan at the source of the reduce sink Operator parentOfRS = ctx.generator.getParentOperators().get(0); @@ -457,23 +483,18 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex assert colName != null; // Fetch the TableScan Operator. Operator op = parentOfRS; - while (!(op == null || op instanceof TableScanOperator)) { + while (!(op == null || op instanceof TableScanOperator || + op instanceof ReduceSinkOperator)) { op = op.getParentOperators().get(0); } assert op != null; - Table table = ((TableScanOperator) op).getConf().getTableMetadata(); - if (table.isPartitionKey(colName)) { - // The column is partition column, skip the optimization. - return false; - } - - // If hint is provided and only hinted semijoin optimizations should be - // created, then skip other columns on the table - if (parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY) - && sjHint.getColName() != null && - !internalColName.equals(sjHint.getColName())) { - return false; + if (op instanceof TableScanOperator) { + Table table = ((TableScanOperator) op).getConf().getTableMetadata(); + if (table.isPartitionKey(colName)) { + // The column is partition column, skip the optimization. + return false; + } } // Check if there already exists a semijoin branch diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 6de4bcd64a..565fbef0aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -123,7 +123,7 @@ private Map colExprToGBMap = new HashMap<>(); - private Map semiJoinHints; + private Map> semiJoinHints; public ParseContext() { } @@ -674,11 +674,11 @@ public void setColExprToGBMap(Map colExprToGBMap) return colExprToGBMap; } - public void setSemiJoinHints(Map hints) { + public void setSemiJoinHints(Map> hints) { this.semiJoinHints = hints; } - public Map getSemiJoinHints() { + public Map> getSemiJoinHints() { return semiJoinHints; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 5115fc8090..18b94973f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9018,14 +9018,13 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { } /** Parses semjoin hints in the query and returns the table names mapped to filter size, or -1 if not specified. - * Hints can be in 3 formats + * Hints can be in 2 formats * 1. TableName, ColumnName, bloom filter entries - * 2. TableName, bloom filter entries, and - * 3. TableName, ColumnName + * 2. TableName, ColumnName * */ - private Map parseSemiJoinHint(List hints) throws SemanticException { + private Map> parseSemiJoinHint(List hints) throws SemanticException { if (hints == null || hints.size() == 0) return null; - Map result = null; + Map> result = null; for (ASTNode hintNode : hints) { for (Node node : hintNode.getChildren()) { ASTNode hint = (ASTNode) node; @@ -9052,17 +9051,12 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { } catch (NumberFormatException ex) { // Ignore. } if (number != null) { - if (alias == null) { + if (alias == null || colName == null) { throw new SemanticException("Invalid semijoin hint - arg " + i + " (" - + text + ") is a number but the previous one is not an alias"); - } - if (result.get(alias) != null) { - // A hint with same alias already present, throw - throw new SemanticException("A hint with alias " + alias + - " already present. Please use unique aliases"); + + text + ") is a number but the previous one is not a column"); } - SemiJoinHint sjHint = new SemiJoinHint(alias, colName, number); - result.put(alias, sjHint); + result.computeIfAbsent(alias, value -> new ArrayList<>()).add( + new SemiJoinHint(colName, number)); alias = null; colName = null; } else { @@ -9072,13 +9066,8 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { colName = text; } else { // No bloom filter entries provided. - if (result.get(alias) != null) { - // A hint with same alias already present, throw - throw new SemanticException("A hint with alias " + alias + - " already present. Please use unique aliases"); - } - SemiJoinHint sjHint = new SemiJoinHint(alias, colName, null); - result.put(alias, sjHint); + result.computeIfAbsent(alias, value -> new ArrayList<>()).add( + new SemiJoinHint(colName, number)); alias = text; colName = null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java index 1f24e23ff3..f7fd306a2c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java @@ -19,20 +19,14 @@ package org.apache.hadoop.hive.ql.parse; public class SemiJoinHint { - private String tabAlias; private String colName; private Integer numEntries; - public SemiJoinHint(String tabAlias, String colName, Integer numEntries) { - this.tabAlias = tabAlias; + public SemiJoinHint(String colName, Integer numEntries) { this.colName = colName; this.numEntries = numEntries; } - public String getTabAlias() { - return tabAlias; - } - public String getColName() { return colName; } diff --git a/ql/src/test/queries/clientpositive/semijoin_hint.q b/ql/src/test/queries/clientpositive/semijoin_hint.q index 5fbc273e7d..3a37229cbe 100644 --- a/ql/src/test/queries/clientpositive/semijoin_hint.q +++ b/ql/src/test/queries/clientpositive/semijoin_hint.q @@ -42,7 +42,7 @@ set hive.cbo.returnpath.hiveop=true; -- disabling this test case for returnpath true as the aliases in case of union are mangled due to which hints are not excercised. --explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) -- union all --- select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); +-- select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); -- Query which creates semijoin explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); @@ -50,7 +50,7 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); @@ -64,7 +64,7 @@ set hive.cbo.returnpath.hiveop=false; explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); -- Query which creates semijoin explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); @@ -72,7 +72,7 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); @@ -86,7 +86,7 @@ set hive.cbo.enable=false; explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); -- Query which creates semijoin explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); @@ -94,7 +94,7 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index 3ffc2352a4..be70130c34 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -400,23 +400,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 7 (BROADCAST_EDGE) - Map 8 <- Reducer 5 (BROADCAST_EDGE) + Map 7 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: i - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_value_min) AND DynamicValue(RS_7_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_value_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_value_min) AND DynamicValue(RS_7_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_value_bloom_filter)))) (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -433,10 +431,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (str is not null and value is not null) (type: boolean) + filterExpr: (str is not null and value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and value is not null) (type: boolean) + predicate: (str is not null and value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string), value (type: string) @@ -448,29 +446,16 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: str (type: string) - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_srcpart_date_str_min) AND DynamicValue(RS_9_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_srcpart_date_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_srcpart_date_str_min) AND DynamicValue(RS_9_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_srcpart_date_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -504,7 +489,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -552,19 +537,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -579,9 +552,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -603,10 +576,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: i - filterExpr: cstring is not null (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter))) and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: cstring is not null (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter))) and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -636,10 +609,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_3_i_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_3_i_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -656,10 +629,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter))) and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(RS_3_i_cstring_min) AND DynamicValue(RS_3_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_3_i_cstring_bloom_filter))) and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -756,10 +729,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: str is not null (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: str is not null (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -881,10 +854,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: value is not null (type: boolean) + filterExpr: (value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: value is not null (type: boolean) + predicate: (value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) @@ -901,10 +874,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: key1 is not null (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: key1 is not null (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -962,11 +935,11 @@ STAGE PLANS: PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -991,10 +964,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: str is not null (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: str is not null (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -1024,10 +997,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: key1 is not null (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: key1 is not null (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -1430,23 +1403,19 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) - Map 5 <- Reducer 8 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: i - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_value_min) AND DynamicValue(RS_10_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_value_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_value_min) AND DynamicValue(RS_10_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_value_bloom_filter)))) (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -1463,10 +1432,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_key1_min) AND DynamicValue(RS_13_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_13_v_key1_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter))) and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_key1_min) AND DynamicValue(RS_13_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_13_v_key1_bloom_filter)))) (type: boolean) + predicate: (str is not null and value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter))) and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string), value (type: string) @@ -1478,29 +1447,16 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: v - filterExpr: key1 is not null (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: key1 is not null (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -1511,19 +1467,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -1576,30 +1519,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1607,9 +1526,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1631,10 +1550,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: i - filterExpr: cstring is not null (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter))) and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: cstring is not null (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter))) and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -1664,10 +1583,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -1684,10 +1603,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter))) and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(RS_9_i_cstring_min) AND DynamicValue(RS_9_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_9_i_cstring_bloom_filter))) and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -1782,10 +1701,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: str is not null (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: str is not null (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string) @@ -1905,10 +1824,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: value is not null (type: boolean) + filterExpr: (value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: value is not null (type: boolean) + predicate: (value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) @@ -1925,10 +1844,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: key1 is not null (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: key1 is not null (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -1984,11 +1903,11 @@ STAGE PLANS: PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2013,10 +1932,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: str is not null (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: str is not null (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) @@ -2042,10 +1961,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: key1 is not null (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: key1 is not null (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: key1 (type: string) @@ -2420,23 +2339,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 7 <- Reducer 6 (BROADCAST_EDGE) - Map 8 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 6 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (str is not null and value is not null) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter))) and value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and value is not null) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter))) and value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) @@ -2459,7 +2376,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: v @@ -2475,14 +2392,14 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: i - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date__col1_min) AND DynamicValue(RS_9_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date__col1_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date__col1_min) AND DynamicValue(RS_9_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date__col1_bloom_filter)))) (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: cstring (type: string) @@ -2507,19 +2424,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2558,18 +2462,6 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000) mode: final outputColumnNames: _col0, _col1, _col2 @@ -2585,9 +2477,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2609,10 +2501,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: (str is not null and (str BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter))) and (str BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and (str BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter))) and (str BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(str, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) @@ -2625,10 +2517,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter))) and (key1 BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter))) and (key1 BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: key1 (type: string) @@ -2641,10 +2533,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: i - filterExpr: cstring is not null (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter))) and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: cstring is not null (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter))) and (cstring BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(cstring, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: cstring (type: string) @@ -2748,10 +2640,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: str is not null (type: boolean) + filterExpr: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: str is not null (type: boolean) + predicate: (str is not null and (str BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(str, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: str (type: string) @@ -2863,10 +2755,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: k - filterExpr: value is not null (type: boolean) + filterExpr: (value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: value is not null (type: boolean) + predicate: (value is not null and (value BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(value, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: value (type: string) @@ -2879,10 +2771,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - filterExpr: key1 is not null (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: key1 is not null (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(_min) AND DynamicValue(_max) and in_bloom_filter(key1, DynamicValue(_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: key1 (type: string)