diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 99c26ce80e..73e0290412 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2907,8 +2907,6 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Big table for runtime filteting should be of atleast this size"), TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD("hive.tez.dynamic.semijoin.reduction.threshold", (float) 0.50, "Only perform semijoin optimization if the estimated benefit at or above this fraction of the target table"), - TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY("hive.tez.dynamic.semijoin.reduction.hint.only", false, - "When true, only enforce semijoin when a hint is provided"), TEZ_SMB_NUMBER_WAVES( "hive.tez.smb.number.waves", (float) 0.5, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index f56b9cbb18..a4772febc0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Stack; +import jline.internal.Preconditions; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.*; @@ -212,36 +213,38 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje if (semiJoin && ts.getConf().getFilterExpr() != null) { LOG.debug("Initiate semijoin reduction for " + column + " (" + ts.getConf().getFilterExpr().getExprString()); - // Get the table name from which the min-max values and bloom filter will come. - Operator op = ctx.generator; - - while (!(op == null || op instanceof TableScanOperator)) { - op = op.getParentOperators().get(0); - } - String tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias()); StringBuilder internalColNameBuilder = new StringBuilder(); StringBuilder colNameBuilder = new StringBuilder(); - if (getColumnName(ctx, internalColNameBuilder, colNameBuilder)) { + + // Apply best effort to fetch the correct table alias. If not + // found, fallback to old logic. + StringBuilder tabAliasBuilder = new StringBuilder(); + if (getColumnInfo(ctx, internalColNameBuilder, colNameBuilder, tabAliasBuilder)) { String colName = colNameBuilder.toString(); + String tableAlias; + if (tabAliasBuilder.length() > 0) { + tableAlias = tabAliasBuilder.toString(); + } else { + Operator op = ctx.generator; + + while (!(op == null || op instanceof TableScanOperator)) { + op = op.getParentOperators().get(0); + } + tableAlias = (op == null ? "" : ((TableScanOperator) op). + getConf().getAlias()); + } + + // Use the tableAlias to generate keyBaseAlias keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + "_" + colName; - Map hints = parseContext.getSemiJoinHints(); + Map> hints = parseContext.getSemiJoinHints(); if (hints != null) { - if (hints.size() > 0) { - SemiJoinHint sjHint = hints.get(tableAlias); - if (sjHint != null && sjHint.getColName() != null && - !colName.equals(sjHint.getColName())) { - LOG.debug("Removed hint due to column mismatch + Col = " + colName + " hint column = " + sjHint.getColName()); - sjHint = null; - } - semiJoinAttempted = generateSemiJoinOperatorPlan( - ctx, parseContext, ts, keyBaseAlias, - internalColNameBuilder.toString(), colName, sjHint); - if (!semiJoinAttempted && sjHint != null) { - throw new SemanticException("The user hint to enforce semijoin failed required conditions"); - } - } + // Create semijoin optimizations ONLY for hinted columns + semiJoinAttempted = processSemiJoinHints( + parseContext, ctx, hints, tableAlias, + internalColNameBuilder.toString(), colName, ts, + keyBaseAlias); } else { // fallback to regular logic semiJoinAttempted = generateSemiJoinOperatorPlan( @@ -297,16 +300,30 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje } // Given a key, find the corresponding column name. - private boolean getColumnName(DynamicListContext ctx, StringBuilder internalColName, - StringBuilder colName) { + private boolean getColumnInfo(DynamicListContext ctx, StringBuilder internalColName, + StringBuilder colName, StringBuilder tabAlias) { ExprNodeDesc exprNodeDesc = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex()); ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc); if (colExpr == null) { return false; } - internalColName.append(colExpr.getColumn()); + + // fetch table ablias + ExprNodeDescUtils.ColumnOrigin columnOrigin = + ExprNodeDescUtils.findColumnOrigin(exprNodeDesc, ctx.generator); + + if (columnOrigin != null) { + // get both tableAlias and column name from columnOrigin + assert columnOrigin.op instanceof TableScanOperator; + TableScanOperator ts = (TableScanOperator) columnOrigin.op; + tabAlias.append(ts.getConf().getAlias()); + colName.append( + ExprNodeDescUtils.getColumnExpr(columnOrigin.col).getColumn()); + return true; + } + Operator parentOfRS = ctx.generator.getParentOperators().get(0); if (!(parentOfRS instanceof SelectOperator)) { colName.append(internalColName.toString()); @@ -324,6 +341,37 @@ private boolean getColumnName(DynamicListContext ctx, StringBuilder internalColN return true; } + // Handle hint based semijoin + private boolean processSemiJoinHints( + ParseContext pCtx, DynamicListContext ctx, + Map> hints, String tableAlias, + String internalColName, String colName, TableScanOperator ts, + String keyBaseAlias) throws SemanticException { + if (hints.size() == 0) { + return false; + } + + List hintList = hints.get(tableAlias); + if (hintList == null) { + return false; + } + + // Iterate through the list + for (SemiJoinHint sjHint : hintList) { + if (!colName.equals(sjHint.getColName())) { + continue; + } + // match! + LOG.info("Creating runtime filter due to user hint: column = " + colName); + if (generateSemiJoinOperatorPlan(ctx, pCtx, ts, keyBaseAlias, + internalColName, colName, sjHint)) { + return true; + } + throw new SemanticException("The user hint to enforce semijoin failed required conditions"); + } + return false; + } + private void replaceExprNode(DynamicListContext ctx, FilterDesc desc, ExprNodeDesc node) { if (ctx.grandParent == null) { desc.setPredicate(node); @@ -442,12 +490,6 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex TableScanOperator ts, String keyBaseAlias, String internalColName, String colName, SemiJoinHint sjHint) throws SemanticException { - // If semijoin hint is enforced, make sure hint is provided - if (parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY) - && sjHint == null) { - return false; - } - // we will put a fork in the plan at the source of the reduce sink Operator parentOfRS = ctx.generator.getParentOperators().get(0); @@ -457,23 +499,18 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex assert colName != null; // Fetch the TableScan Operator. Operator op = parentOfRS; - while (!(op == null || op instanceof TableScanOperator)) { + while (!(op == null || op instanceof TableScanOperator || + op instanceof ReduceSinkOperator)) { op = op.getParentOperators().get(0); } - assert op != null; - - Table table = ((TableScanOperator) op).getConf().getTableMetadata(); - if (table.isPartitionKey(colName)) { - // The column is partition column, skip the optimization. - return false; - } + Preconditions.checkNotNull(op); - // If hint is provided and only hinted semijoin optimizations should be - // created, then skip other columns on the table - if (parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY) - && sjHint.getColName() != null && - !internalColName.equals(sjHint.getColName())) { - return false; + if (op instanceof TableScanOperator) { + Table table = ((TableScanOperator) op).getConf().getTableMetadata(); + if (table.isPartitionKey(colName)) { + // The column is partition column, skip the optimization. + return false; + } } // Check if there already exists a semijoin branch diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 6de4bcd64a..565fbef0aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -123,7 +123,7 @@ private Map colExprToGBMap = new HashMap<>(); - private Map semiJoinHints; + private Map> semiJoinHints; public ParseContext() { } @@ -674,11 +674,11 @@ public void setColExprToGBMap(Map colExprToGBMap) return colExprToGBMap; } - public void setSemiJoinHints(Map hints) { + public void setSemiJoinHints(Map> hints) { this.semiJoinHints = hints; } - public Map getSemiJoinHints() { + public Map> getSemiJoinHints() { return semiJoinHints; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 5115fc8090..ddf74f2cd4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9018,14 +9018,13 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { } /** Parses semjoin hints in the query and returns the table names mapped to filter size, or -1 if not specified. - * Hints can be in 3 formats + * Hints can be in 2 formats * 1. TableName, ColumnName, bloom filter entries - * 2. TableName, bloom filter entries, and - * 3. TableName, ColumnName + * 2. TableName, ColumnName * */ - private Map parseSemiJoinHint(List hints) throws SemanticException { + private Map> parseSemiJoinHint(List hints) throws SemanticException { if (hints == null || hints.size() == 0) return null; - Map result = null; + Map> result = null; for (ASTNode hintNode : hints) { for (Node node : hintNode.getChildren()) { ASTNode hint = (ASTNode) node; @@ -9033,8 +9032,6 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { if (result == null) { result = new HashMap<>(); } - String alias = null; - String colName = null; Tree args = hint.getChild(1); if (args.getChildCount() == 1) { String text = args.getChild(0).getText(); @@ -9043,46 +9040,9 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { return result; } } - for (int i = 0; i < args.getChildCount(); i++) { - // We can have table names, column names or sizes here (or incorrect hint if the user is so inclined). - String text = args.getChild(i).getText(); - Integer number = null; - try { - number = Integer.parseInt(text); - } catch (NumberFormatException ex) { // Ignore. - } - if (number != null) { - if (alias == null) { - throw new SemanticException("Invalid semijoin hint - arg " + i + " (" - + text + ") is a number but the previous one is not an alias"); - } - if (result.get(alias) != null) { - // A hint with same alias already present, throw - throw new SemanticException("A hint with alias " + alias + - " already present. Please use unique aliases"); - } - SemiJoinHint sjHint = new SemiJoinHint(alias, colName, number); - result.put(alias, sjHint); - alias = null; - colName = null; - } else { - if (alias == null) { - alias = text; - } else if (colName == null) { - colName = text; - } else { - // No bloom filter entries provided. - if (result.get(alias) != null) { - // A hint with same alias already present, throw - throw new SemanticException("A hint with alias " + alias + - " already present. Please use unique aliases"); - } - SemiJoinHint sjHint = new SemiJoinHint(alias, colName, null); - result.put(alias, sjHint); - alias = text; - colName = null; - } - } + int curIdx = 0; + while(curIdx < args.getChildCount()) { + curIdx = parseSingleSemiJoinHint(args, curIdx, result); } } } @@ -9092,6 +9052,40 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { return result; } + private int parseSingleSemiJoinHint(Tree args, int curIdx, Map> result) + throws SemanticException { + // Check if there are enough entries in the tree to constitute a hint. + int numEntriesLeft = args.getChildCount() - curIdx; + if (numEntriesLeft < 2) { + throw new SemanticException("User provided only 1 entry for the hint with alias " + + args.getChild(curIdx).getText()); + } + + String alias = args.getChild(curIdx++).getText(); + // validate + if (StringUtils.isNumeric(alias)) { + throw new SemanticException("User provided bloom filter entries when alias is expected"); + } + + String colName = args.getChild(curIdx++).getText(); + // validate + if (StringUtils.isNumeric(colName)) { + throw new SemanticException("User provided bloom filter entries when column name is expected"); + } + + Integer number = null; + if (numEntriesLeft > 2) { + // Check if there exists bloom filter size entry + try { + number = Integer.parseInt(args.getChild(curIdx).getText()); + curIdx++; + } catch (NumberFormatException e) { // Ignore + } + } + result.computeIfAbsent(alias, value -> new ArrayList<>()).add(new SemiJoinHint(colName, number)); + return curIdx; + } + /** * Merges node to target */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java index 1f24e23ff3..f7fd306a2c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java @@ -19,20 +19,14 @@ package org.apache.hadoop.hive.ql.parse; public class SemiJoinHint { - private String tabAlias; private String colName; private Integer numEntries; - public SemiJoinHint(String tabAlias, String colName, Integer numEntries) { - this.tabAlias = tabAlias; + public SemiJoinHint(String colName, Integer numEntries) { this.colName = colName; this.numEntries = numEntries; } - public String getTabAlias() { - return tabAlias; - } - public String getColName() { return colName; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 01fab9c5d6..3f9169ebcc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -801,14 +801,13 @@ public ColumnOrigin(ExprNodeColumnDesc col, Operator op) { } private static ExprNodeDesc findParentExpr(ExprNodeColumnDesc col, Operator op) { - if (op instanceof ReduceSinkOperator) { - return col; - } - ExprNodeDesc parentExpr = col; Map mapping = op.getColumnExprMap(); if (mapping != null) { parentExpr = mapping.get(col.getColumn()); + if (parentExpr == null && op instanceof ReduceSinkOperator) { + return col; + } } return parentExpr; } diff --git a/ql/src/test/queries/clientpositive/semijoin_hint.q b/ql/src/test/queries/clientpositive/semijoin_hint.q index 5fbc273e7d..71fa445ec8 100644 --- a/ql/src/test/queries/clientpositive/semijoin_hint.q +++ b/ql/src/test/queries/clientpositive/semijoin_hint.q @@ -42,7 +42,7 @@ set hive.cbo.returnpath.hiveop=true; -- disabling this test case for returnpath true as the aliases in case of union are mangled due to which hints are not excercised. --explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) -- union all --- select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); +-- select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); -- Query which creates semijoin explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); @@ -50,21 +50,18 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -set hive.tez.dynamic.semijoin.reduction.hint.only=true; --- This should NOT create a semijoin as the join is on different column +-- This should NOT create a semijoin explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); -set hive.tez.dynamic.semijoin.reduction.hint.only=false; - set hive.cbo.returnpath.hiveop=false; explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); -- Query which creates semijoin explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); @@ -72,21 +69,20 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -set hive.tez.dynamic.semijoin.reduction.hint.only=true; --- This should NOT create a semijoin as the join is on different column +-- This should NOT create a semijoin explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); -set hive.tez.dynamic.semijoin.reduction.hint.only=false; + set hive.cbo.enable=false; explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1); -- Query which creates semijoin explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); @@ -94,11 +90,10 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring); -EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); +EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring); explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1); -set hive.tez.dynamic.semijoin.reduction.hint.only=true; --- This should NOT create a semijoin as the join is on different column +-- This should NOT create a semijoin explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1); -set hive.tez.dynamic.semijoin.reduction.hint.only=false; + diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index e3ffcfa857..35822f4281 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -1402,10 +1402,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_int - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter)))) (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter)))) (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out index 650dc9ffd7..1da1121d6a 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out @@ -107,10 +107,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tt2 - filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1__col3_min) AND DynamicValue(RS_23_t1__col3_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1__col3_bloom_filter)))) (type: boolean) + filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_9_min) AND DynamicValue(RS_23_t1_timestamp_col_9_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_9_bloom_filter)))) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1__col3_min) AND DynamicValue(RS_23_t1__col3_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1__col3_bloom_filter)))) (type: boolean) + predicate: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_9_min) AND DynamicValue(RS_23_t1_timestamp_col_9_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_9_bloom_filter)))) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp) diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out index 0098b893f4..18659b3c4f 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out @@ -761,7 +761,7 @@ Stage-0 Select Operator [SEL_8] (rows=9174 width=70) Output:["_col0"] Filter Operator [FIL_28] (rows=9174 width=70) - predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter)))) + predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter)))) TableScan [TS_6] (rows=12288 width=70) default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] <-Reducer 5 [BROADCAST_EDGE] llap diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index 3ffc2352a4..ae9bf9bd8b 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -400,23 +400,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 7 (BROADCAST_EDGE) - Map 8 <- Reducer 5 (BROADCAST_EDGE) + Map 7 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: i - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_value_min) AND DynamicValue(RS_7_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_value_bloom_filter)))) (type: boolean) + filterExpr: cstring is not null (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_value_min) AND DynamicValue(RS_7_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_value_bloom_filter)))) (type: boolean) + predicate: cstring is not null (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -448,29 +446,16 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: str (type: string) - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_srcpart_date_str_min) AND DynamicValue(RS_9_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_srcpart_date_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_srcpart_date_str_min) AND DynamicValue(RS_9_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_srcpart_date_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -504,7 +489,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -552,19 +537,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -579,9 +552,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -962,11 +935,11 @@ STAGE PLANS: PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1430,23 +1403,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) - Map 5 <- Reducer 8 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 7 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: i - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_value_min) AND DynamicValue(RS_10_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_value_bloom_filter)))) (type: boolean) + filterExpr: cstring is not null (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_value_min) AND DynamicValue(RS_10_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_value_bloom_filter)))) (type: boolean) + predicate: cstring is not null (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring (type: string) @@ -1459,14 +1430,14 @@ STAGE PLANS: Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_key1_min) AND DynamicValue(RS_13_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_13_v_key1_bloom_filter)))) (type: boolean) + filterExpr: (str is not null and value is not null) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_key1_min) AND DynamicValue(RS_13_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_13_v_key1_bloom_filter)))) (type: boolean) + predicate: (str is not null and value is not null) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str (type: string), value (type: string) @@ -1478,29 +1449,16 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs Map 7 Map Operator Tree: TableScan alias: v - filterExpr: key1 is not null (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_12_srcpart_date_str_min) AND DynamicValue(RS_12_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_12_srcpart_date_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: key1 is not null (type: boolean) + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_12_srcpart_date_str_min) AND DynamicValue(RS_12_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_12_srcpart_date_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) @@ -1511,19 +1469,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -1542,6 +1487,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1576,11 +1534,11 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -1588,18 +1546,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1607,9 +1553,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1984,11 +1930,11 @@ STAGE PLANS: PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1) union all - select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) + select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2420,13 +2366,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 7 <- Reducer 6 (BROADCAST_EDGE) - Map 8 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 6 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2459,7 +2403,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: v @@ -2475,14 +2419,14 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: i - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date__col1_min) AND DynamicValue(RS_9_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date__col1_bloom_filter)))) (type: boolean) + filterExpr: cstring is not null (type: boolean) Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date__col1_min) AND DynamicValue(RS_9_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date__col1_bloom_filter)))) (type: boolean) + predicate: cstring is not null (type: boolean) Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: cstring (type: string) @@ -2507,19 +2451,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2558,18 +2489,6 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000) mode: final outputColumnNames: _col0, _col1, _col2 @@ -2585,9 +2504,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) +POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage