diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a479deb..b57b1ad 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2848,6 +2848,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "requires hive.tez.dynamic.partition.pruning to be enabled."), TEZ_MAX_BLOOM_FILTER_ENTRIES("hive.tez.max.bloom.filter.entries", 100000000L, "Bloom filter should be of at max certain size to be effective"), + TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD("hive.tez.dynamic.semijoin.reduction.threshold", (float) 0.50, + "Only perform semijoin optimizations it will reduce the scan results by this minimum amount"), TEZ_SMB_NUMBER_WAVES( "hive.tez.smb.number.waves", (float) 0.5, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index e6f21e9..2ca1fe1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -625,6 +625,7 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex runtimeValuesInfo.setTableDesc(rsFinalTableDesc); runtimeValuesInfo.setDynamicValueIDs(dynamicValueIDs); runtimeValuesInfo.setColExprs(rsValueCols); + runtimeValuesInfo.setTsColExpr(ctx.parent.getChildren().get(0)); parseContext.getRsToRuntimeValuesInfoMap().put(rsOpFinal, runtimeValuesInfo); return true; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java index 5865f1a..0fe8a27 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java @@ -33,6 +33,8 @@ private TableDesc tableDesc; private List dynamicValueIDs; private List colExprs; + // Column expression of the table being filtered by the semijoin optimization. + private ExprNodeDesc tsColExpr; // get-set methods public TableDesc getTableDesc() { @@ -58,5 +60,13 @@ public void setDynamicValueIDs(List dynamicValueIDs) { public void setColExprs(List colExprs) { this.colExprs = colExprs; } + + public ExprNodeDesc getTsColExpr() { + return tsColExpr; + } + + public void setTsColExpr(ExprNodeDesc tsColExpr) { + this.tsColExpr = tsColExpr; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 47b229f..fba3ce4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.parse; +import com.google.common.base.Preconditions; import java.io.Serializable; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; @@ -58,6 +59,8 @@ import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.stats.StatsUtils; /** * TezCompiler translates the operator plan into TezTasks. @@ -122,6 +125,9 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, removeSemiJoinIfNoStats(procCtx); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed"); + // Removing semijoin optimization when it may not be beneficial + removeSemijoinOptimizationByBenefit(procCtx); + // need a new run of the constant folding because we might have created lots // of "and true and true" conditions. // Rather than run the full constant folding just need to shortcut AND/OR expressions @@ -810,4 +816,220 @@ private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx) GraphWalker ogw = new PreOrderOnceWalker(disp); ogw.startWalking(topNodes, null); } + + private static boolean canUseNDV(ColStatistics colStats) { + return (colStats != null) && (colStats.getCountDistint() >= 0); + } + + private static double getBloomFilterCost( + SelectOperator sel, + TableScanOperator ts) { + double costBytes = -1; + Statistics selStats = sel.getStatistics(); + if (selStats != null) { + costBytes = selStats.getDataSize(); + + // Some other things that could be added here to model cost: + // Cost of computing/sending partial BloomFilter results? BloomFilterSize * # mappers + // For reduce-side join, add the cost of the semijoin table scan/dependent tablescans? + } + return costBytes; + } + + private static long getCombinedKeyDomainCardinality( + ColStatistics selColStat, + ColStatistics selColSourceStat, + ColStatistics tsColStat) { + long keyDomainCardinality = -1; + if (!canUseNDV(selColStat) || !canUseNDV(tsColStat)) { + return -1; + } + + long selColSourceNdv = canUseNDV(selColSourceStat) ? selColSourceStat.getCountDistint() : -1; + boolean semiJoinKeyIsPK = StatsUtils.inferForeignKey(selColStat, tsColStat); + if (semiJoinKeyIsPK) { + // PK/FQ relationship: NDV of selColSourceStat is a superset of what is in tsColStat + if (selColSourceNdv >= 0) { + // Most accurate domain cardinality would be source column NDV if available. + keyDomainCardinality = selColSourceNdv; + } else if (StatsUtils.hasDiscreteRange(selColStat)) { + // Try using the cardinality from the value range + ColStatistics.Range range = selColStat.getRange(); + long rangeDelta = StatsUtils.getRangeDelta(range); + if (rangeDelta > 0) { + keyDomainCardinality = rangeDelta; + } + } + } else { + if (selColSourceNdv >= 0) { + // If semijoin keys and ts keys completely unrelated, the cardinality of both sets + // could be obtained by adding both cardinalities. Would there be an average case? + keyDomainCardinality = selColSourceNdv + tsColStat.getCountDistint(); + } else if (StatsUtils.hasDiscreteRange(selColStat) + && StatsUtils.hasDiscreteRange(tsColStat)) { + // Trying using the cardinality from the value range. + ColStatistics.Range combinedRange = StatsUtils.combineRange(selColStat.getRange(), tsColStat.getRange()); + if (combinedRange != null) { + keyDomainCardinality = StatsUtils.getRangeDelta(combinedRange); + } else { + keyDomainCardinality = StatsUtils.getRangeDelta(selColStat.getRange()) + + StatsUtils.getRangeDelta(tsColStat.getRange()); + } + } + // Otherwise, we tried .. + } + if (LOG.isDebugEnabled()) { + LOG.debug("Computing key domain cardinality, keyDomainCardinality=" + keyDomainCardinality + + ", semiJoinKeyIsPK=" + semiJoinKeyIsPK + + ", selColStat=" + selColStat + + ", selColSourceStat=" + selColSourceStat + + ", tsColStat=" + tsColStat); + } + + return keyDomainCardinality; + } + + private static double getBloomFilterBenefit( + SelectOperator sel, ExprNodeDesc selExpr, + TableScanOperator ts, ExprNodeDesc tsExpr) { + double benefitBytes = -1; + Statistics selStats = sel.getStatistics(); + Statistics tsStats = ts.getStatistics(); + if (selStats == null || tsStats == null) { + LOG.debug("No stats available to compute BloomFilter benefit"); + return benefitBytes; + } + + // For cardinality values use numRows as default, try to use ColStats if available + long selKeyCardinality = selStats.getNumRows(); + long tsKeyCardinality = tsStats.getNumRows(); + long tsRows = tsStats.getNumRows(); + long tsRowSize = tsStats.getAvgRowSize(); + long keyDomainCardinality = selKeyCardinality + tsKeyCardinality; + + ExprNodeColumnDesc selCol = ExprNodeDescUtils.getColumnExpr(selExpr); + ExprNodeColumnDesc tsCol = ExprNodeDescUtils.getColumnExpr(tsExpr); + if (selCol != null && tsCol != null) { + // Check if there are column stats available for these columns + ColStatistics selColStat = selStats.getColumnStatisticsFromColName(selCol.getColumn()); + ColStatistics tsColStat = tsStats.getColumnStatisticsFromColName(tsCol.getColumn()); + if (canUseNDV(selColStat)) { + selKeyCardinality = selColStat.getCountDistint(); + } + if (canUseNDV(tsColStat)) { + tsKeyCardinality = tsColStat.getCountDistint(); + } + // Get colstats for the original table column for selCol if possible, this would have + // more accurate information about the original NDV of the column before any filtering. + ColStatistics selColSourceStat = null; + if (selColStat != null) { + ExprNodeDescUtils.ColumnOrigin selColSource = ExprNodeDescUtils.findColumnOrigin(selCol, sel); + if (selColSource != null && selColSource.op.getStatistics() != null) { + selColSourceStat = selColSource.op.getStatistics().getColumnStatisticsFromColName( + selColSource.col.getColumn()); + } + } + long domainCardinalityFromColStats = getCombinedKeyDomainCardinality( + selColStat, selColSourceStat, tsColStat); + if (domainCardinalityFromColStats >= 0) { + keyDomainCardinality = domainCardinalityFromColStats; + } + } + + // Selectivity: key cardinality of semijoin / domain cardinality + // Benefit (rows filtered from ts): (1 - selectivity) * # ts rows + double selectivity = selKeyCardinality / (double) keyDomainCardinality; + selectivity = Math.min(selectivity, 1); + double benefitRows = tsRows * (1 - selectivity); + benefitBytes = benefitRows * tsRowSize; + + if (LOG.isDebugEnabled()) { + LOG.debug("BloomFilter benefit for " + selCol + " to " + tsCol + + ", selKeyCardinality=" + selKeyCardinality + + ", tsKeyCardinality=" + tsKeyCardinality + + ", tsRows=" + tsRows + + ", keyDomainCardinality=" + keyDomainCardinality); + LOG.debug("SemiJoin key selectivity=" + selectivity + + ", benefitRows=" + benefitRows + + ", benefitBytes=" + benefitBytes); + } + + return benefitBytes; + } + + private static double computeBloomFilterNetBenefit( + SelectOperator sel, ExprNodeDesc selExpr, + TableScanOperator ts, ExprNodeDesc tsExpr) { + double netBenefit = -1; + double benefitBytes = getBloomFilterBenefit(sel, selExpr, ts, tsExpr); + Statistics tsStats = ts.getStatistics(); + if (benefitBytes > 0 && tsStats != null) { + double costBytes = getBloomFilterCost(sel, ts); + if (costBytes > 0) { + long tsDataSize = tsStats.getDataSize(); + netBenefit = (benefitBytes - costBytes) / tsDataSize; + LOG.debug("BloomFilter benefitBytes=" + benefitBytes + + ", costBytes=" + costBytes + + ", tsDataSize=" + tsDataSize + + ", netBenefitBytes=" + (benefitBytes - costBytes)); + } + } + LOG.debug("netBenefit=" + netBenefit); + return netBenefit; + } + + private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx) + throws SemanticException { + if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) { + // Not needed without semi-join reduction + return; + } + + List semijoinRsToRemove = new ArrayList(); + Map map = procCtx.parseContext.getRsOpToTsOpMap(); + double semijoinReductionThreshold = procCtx.conf.getFloatVar( + HiveConf.ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD); + for (ReduceSinkOperator rs : map.keySet()) { + // rs is semijoin optimization branch, which should look like -SEL-GB1-RS1-GB2-RS2 + // Get to the SelectOperator ancestor + SelectOperator sel = null; + for (Operator currOp = rs; currOp.getParentOperators().size() > 0; currOp = currOp.getParentOperators().get(0)) { + if (currOp instanceof SelectOperator) { + sel = (SelectOperator) currOp; + break; + } + } + if (sel == null) { + throw new SemanticException("Unexpected error - could not find SEL ancestor from semijoin branch of " + rs); + } + + // Check the ndv/rows from the SEL vs the destination tablescan the semijoin opt is going to. + TableScanOperator ts = map.get(rs); + RuntimeValuesInfo rti = procCtx.parseContext.getRsToRuntimeValuesInfoMap().get(rs); + ExprNodeDesc tsExpr = rti.getTsColExpr(); + // In the SEL operator of the semijoin branch, there should be only one column in the operator + ExprNodeDesc selExpr = sel.getConf().getColList().get(0); + + if (LOG.isDebugEnabled()) { + LOG.debug("Computing BloomFilter cost/benefit for " + OperatorUtils.getOpNamePretty(rs) + + " - " + OperatorUtils.getOpNamePretty(ts) + " (" + tsExpr + ")"); + } + + double reductionFactor = computeBloomFilterNetBenefit(sel, selExpr, ts, tsExpr); + if (reductionFactor < semijoinReductionThreshold) { + // This semijoin optimization should be removed. Do it after we're done iterating + semijoinRsToRemove.add(rs); + } + } + + for (ReduceSinkOperator rs : semijoinRsToRemove) { + TableScanOperator ts = map.get(rs); + if (LOG.isDebugEnabled()) { + LOG.debug("Reduction factor not satisfied for " + OperatorUtils.getOpNamePretty(rs) + + "-" + OperatorUtils.getOpNamePretty(ts) + ". Removing semijoin optimization."); + } + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs, ts); + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 6c10704..ec04e6d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -782,4 +782,59 @@ private static boolean checkPrefixKeys(List childKeys, List op; + + public ColumnOrigin(ExprNodeColumnDesc col, Operator op) { + super(); + this.col = col; + this.op = op; + } + } + + public static ColumnOrigin findColumnOrigin(ExprNodeDesc expr, Operator op) { + if (expr == null || op == null) { + // bad input + return null; + } + + ExprNodeColumnDesc col = ExprNodeDescUtils.getColumnExpr(expr); + if (col == null) { + // not a column + return null; + } + + Operator parentOp = null; + int numParents = op.getNumParent(); + if (numParents == 0) { + return new ColumnOrigin(col, op); + } + + ExprNodeDesc parentExpr = col; + Map mapping = op.getColumnExprMap(); + if (mapping != null) { + parentExpr = mapping.get(col.getColumn()); + } + if (parentExpr == null) { + // couldn't find proper parent column expr + return null; + } + + if (numParents == 1) { + parentOp = op.getParentOperators().get(0); + } else { + // Multiple parents - find the right one based on the table alias in the parentExpr +// for (Operator currParent : op.getParentOperators()) { + +// } + } + + if (parentOp == null) { + return null; + } + + return findColumnOrigin(parentExpr, parentOp); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 413aacf..bda2050 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -75,6 +75,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; @@ -100,6 +101,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BytesWritable; @@ -393,7 +396,7 @@ public static float getScaledSelectivity(ColStatistics csPK, ColStatistics csFK) return scaledSelectivity; } - private static long getRangeDelta(ColStatistics.Range range) { + public static long getRangeDelta(ColStatistics.Range range) { if (range.minValue != null && range.maxValue != null) { return (range.maxValue.longValue() - range.minValue.longValue()); } @@ -1684,4 +1687,46 @@ public static int getNumBitVectorsForNDVEstimation(HiveConf conf) throws Semanti } return numBitVectors; } + + public static boolean hasDiscreteRange(ColStatistics colStat) { + if (colStat.getRange() != null) { + TypeInfo colType = TypeInfoUtils.getTypeInfoFromTypeString(colStat.getColumnType()); + if (colType.getCategory() == Category.PRIMITIVE) { + PrimitiveTypeInfo pti = (PrimitiveTypeInfo) colType; + switch (pti.getPrimitiveCategory()) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + return true; + default: + break; + } + } + } + return false; + } + + public static Range combineRange(Range range1, Range range2) { + if ( range1.minValue != null && range1.maxValue != null + && range2.minValue != null && range2.maxValue != null) { + long min1 = range1.minValue.longValue(); + long max1 = range1.maxValue.longValue(); + long min2 = range2.minValue.longValue(); + long max2 = range2.maxValue.longValue(); + + if ( (min1 < min2 && max1 < max2) + || (min1 > min2 && max1 > max2)) { + // No overlap between the two ranges + return null; + } else { + // There is an overlap of ranges - create combined range. + return new ColStatistics.Range( + Math.min(min1, min2), + Math.max(max1, max2)); + } + } + return null; + } } diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q index e686af6..80411ef 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q @@ -7,6 +7,7 @@ set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.stats.fetch.column.stats=true; -- Create Tables create table alltypesorc_int ( cint int, cstring string ) stored as ORC; @@ -24,7 +25,8 @@ alter table srcpart_small add partition (ds = "2008-04-09"); insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc; insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"; insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; -insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; +insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20; + set hive.tez.dynamic.semijoin.reduction=false; -- single column, single key diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q index 2306395..8f97a76f 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q @@ -7,6 +7,7 @@ set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.tez.dynamic.semijoin.reduction.threshold=-999999999999; CREATE TABLE `table_1`( `bigint_col_7` bigint, diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q index adfc7a6..02802d5 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q @@ -12,6 +12,8 @@ set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.tez.dynamic.semijoin.reduction.threshold=-999999999999; + -- Try with merge statements create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q index be8e4af..6a89c80 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q @@ -10,6 +10,8 @@ set hive.optimize.index.filter=true; set hive.vectorized.adaptor.usage.mode=none; set hive.vectorized.execution.enabled=true; +set hive.stats.fetch.column.stats=true; +set hive.tez.dynamic.semijoin.reduction.threshold=-999999999999; -- Create Tables create table dsrv2_big stored as orc as diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 96d998f..35dde96 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -3135,11 +3135,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 7 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3163,7 +3161,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: unknown - Map 6 + Map 5 Map Operator Tree: TableScan alias: srcpart_date @@ -3183,14 +3181,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: ((UDFToDouble(hr) = 13.0) and (hr BETWEEN DynamicValue(RS_12_srcpart_hr_min) AND DynamicValue(RS_12_srcpart_hr_max) and in_bloom_filter(hr, DynamicValue(RS_12_srcpart_hr_bloom_filter)))) (type: boolean) + filterExpr: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(hr) = 13.0) and (hr BETWEEN DynamicValue(RS_12_srcpart_hr_min) AND DynamicValue(RS_12_srcpart_hr_max) and in_bloom_filter(hr, DynamicValue(RS_12_srcpart_hr_bloom_filter)))) (type: boolean) + predicate: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -3219,19 +3217,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3266,18 +3251,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 012db41..a9ff8a4 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -88,13 +88,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +PREHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpart_small@ds=2008-04-09 -POSTHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +POSTHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 @@ -144,19 +144,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -218,7 +218,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) @@ -263,25 +263,25 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -329,7 +329,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -362,7 +362,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds) @@ -420,16 +420,16 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 360000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -534,19 +534,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 5 @@ -632,7 +632,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -48 +0 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) @@ -646,13 +646,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) - Map 8 <- Reducer 4 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Map 1 <- Reducer 5 (BROADCAST_EDGE) + Map 7 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -673,45 +672,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2000) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -722,9 +708,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -734,14 +720,14 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: alltypesorc_int - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_cstring_min) AND DynamicValue(RS_9_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_cstring_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter)))) (type: boolean) + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_cstring_min) AND DynamicValue(RS_9_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_cstring_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter)))) (type: boolean) + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring (type: string) @@ -790,11 +776,11 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -806,19 +792,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -853,7 +827,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -48 +0 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) @@ -896,19 +870,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key1 (type: string), value1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -970,7 +944,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_date @@ -989,7 +963,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) @@ -1035,25 +1009,25 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key1 (type: string), value1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -1064,9 +1038,9 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -1114,7 +1088,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -1126,7 +1100,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -1185,19 +1159,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 6 @@ -1311,13 +1285,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 7 (BROADCAST_EDGE) - Map 8 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1341,30 +1313,30 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: all inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -1374,14 +1346,14 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: alltypesorc_int - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) + filterExpr: cstring is not null (type: boolean) Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) + predicate: cstring is not null (type: boolean) Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cstring (type: string) @@ -1410,19 +1382,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2200) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1457,23 +1416,11 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2200) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 7 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -1565,25 +1512,25 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -1612,7 +1559,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -1645,7 +1592,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) @@ -1702,25 +1649,25 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -1749,7 +1696,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE @@ -1782,7 +1729,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: drop table srcpart_date PREHOOK: type: DROPTABLE PREHOOK: Input: default@srcpart_date diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out index 2dcfd6b..94e371b 100644 --- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -284,10 +284,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -308,29 +306,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -377,18 +362,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1401,10 +1374,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1421,38 +1392,22 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key BETWEEN DynamicValue(RS_4_a_key_min) AND DynamicValue(RS_4_a_key_max) and in_bloom_filter(key, DynamicValue(RS_4_a_key_bloom_filter))) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1489,18 +1444,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1542,10 +1485,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1553,18 +1494,15 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key BETWEEN DynamicValue(RS_5_b_key_min) AND DynamicValue(RS_5_b_key_max) and in_bloom_filter(key, DynamicValue(RS_5_b_key_bloom_filter))) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 @@ -1581,19 +1519,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=500) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1630,18 +1555,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=500) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1790,13 +1703,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 8 (BROADCAST_EDGE) - Map 6 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1818,29 +1729,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_9_a_key_min) AND DynamicValue(RS_9_a_key_max) and in_bloom_filter(key, DynamicValue(RS_9_a_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_9_a_key_min) AND DynamicValue(RS_9_a_key_max) and in_bloom_filter(key, DynamicValue(RS_9_a_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -1853,7 +1751,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: c @@ -1936,19 +1834,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 8 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1997,10 +1883,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2021,29 +1905,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: b - filterExpr: (value is not null and (value BETWEEN DynamicValue(RS_6_a_value_min) AND DynamicValue(RS_6_a_value_max) and in_bloom_filter(value, DynamicValue(RS_6_a_value_bloom_filter)))) (type: boolean) + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (value BETWEEN DynamicValue(RS_6_a_value_min) AND DynamicValue(RS_6_a_value_max) and in_bloom_filter(value, DynamicValue(RS_6_a_value_bloom_filter)))) (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) @@ -2090,18 +1961,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -2145,12 +2004,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 8 <- Union 3 (CONTAINS) - Map 9 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 9 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) + Map 7 <- Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Map 8 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Union 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2173,7 +2030,7 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -2193,7 +2050,7 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -2211,29 +2068,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 9 + Map 8 Map Operator Tree: TableScan alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_18_s1_key_min) AND DynamicValue(RS_18_s1_key_max) and in_bloom_filter(key, DynamicValue(RS_18_s1_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_18_s1_key_min) AND DynamicValue(RS_18_s1_key_max) and in_bloom_filter(key, DynamicValue(RS_18_s1_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -2262,19 +2106,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2309,18 +2140,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=508) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Union 3 Vertex: Union 3 @@ -2343,10 +2162,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2367,29 +2184,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: b - filterExpr: (value is not null and (value BETWEEN DynamicValue(RS_6_a_value_min) AND DynamicValue(RS_6_a_value_max) and in_bloom_filter(value, DynamicValue(RS_6_a_value_bloom_filter)))) (type: boolean) + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (value BETWEEN DynamicValue(RS_6_a_value_min) AND DynamicValue(RS_6_a_value_max) and in_bloom_filter(value, DynamicValue(RS_6_a_value_bloom_filter)))) (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) @@ -2436,18 +2240,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -2483,13 +2275,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 8 (BROADCAST_EDGE) - Map 6 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2511,29 +2301,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_9_a_key_min) AND DynamicValue(RS_9_a_key_max) and in_bloom_filter(key, DynamicValue(RS_9_a_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_9_a_key_min) AND DynamicValue(RS_9_a_key_max) and in_bloom_filter(key, DynamicValue(RS_9_a_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -2546,7 +2323,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: c @@ -2629,19 +2406,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 8 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2698,12 +2463,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 8 <- Union 3 (CONTAINS) - Map 9 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 9 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) + Map 7 <- Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Map 8 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Union 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2726,7 +2489,7 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -2746,7 +2509,7 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -2764,29 +2527,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 9 + Map 8 Map Operator Tree: TableScan alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_18_s1_key_min) AND DynamicValue(RS_18_s1_key_max) and in_bloom_filter(key, DynamicValue(RS_18_s1_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_18_s1_key_min) AND DynamicValue(RS_18_s1_key_max) and in_bloom_filter(key, DynamicValue(RS_18_s1_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -2815,19 +2565,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=508) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2862,18 +2599,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=508) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Union 3 Vertex: Union 3 @@ -2910,12 +2635,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2937,14 +2660,14 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: t2 - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_12_t1_key_min) AND DynamicValue(RS_12_t1_key_max) and in_bloom_filter(key, DynamicValue(RS_12_t1_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_12_t1_key_min) AND DynamicValue(RS_12_t1_key_max) and in_bloom_filter(key, DynamicValue(RS_12_t1_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) @@ -2968,19 +2691,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3015,19 +2725,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 7 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 3eb655d..230052c 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -3398,11 +3398,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 7 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3432,7 +3430,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: true vectorized: true - Map 6 + Map 5 Map Operator Tree: TableScan alias: srcpart_date @@ -3460,14 +3458,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 7 + Map 6 Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: ((UDFToDouble(hr) = 13.0) and (hr BETWEEN DynamicValue(RS_12_srcpart_hr_min) AND DynamicValue(RS_12_srcpart_hr_max) and in_bloom_filter(hr, DynamicValue(RS_12_srcpart_hr_bloom_filter)))) (type: boolean) + filterExpr: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(hr) = 13.0) and (hr BETWEEN DynamicValue(RS_12_srcpart_hr_min) AND DynamicValue(RS_12_srcpart_hr_max) and in_bloom_filter(hr, DynamicValue(RS_12_srcpart_hr_bloom_filter)))) (type: boolean) + predicate: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -3504,19 +3502,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3558,25 +3543,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 32609eb..087f916 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -824,101 +824,34 @@ POSTHOOK: query: explain analyze select a.key, a.value, b.value from tab a join tab_part b on a.key = b.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 3 <- Map 1 (CUSTOM_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1/3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1/3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500/500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500/244 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500/244 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 550/480 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550/480 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550/480 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1/1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1/1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Plan optimized by CBO. + +Vertex dependency in root stage +Map 2 <- Map 1 (CUSTOM_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 2 + File Output Operator [FS_10] + Select Operator [SEL_9] (rows=550/480 width=18) + Output:["_col0","_col1","_col2"] + Map Join Operator [MAPJOIN_25] (rows=550/480 width=18) + BucketMapJoin:true,Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] + <-Map 1 [CUSTOM_EDGE] + MULTICAST [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=242/242 width=18) + Output:["_col0","_col1"] + Filter Operator [FIL_13] (rows=242/242 width=18) + predicate:key is not null + TableScan [TS_0] (rows=242/242 width=18) + default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_5] (rows=500/500 width=18) + Output:["_col0","_col1"] + Filter Operator [FIL_14] (rows=500/500 width=18) + predicate:key is not null + TableScan [TS_3] (rows=500/500 width=18) + default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index da52b0a..3af4dbc 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -652,102 +652,34 @@ POSTHOOK: query: explain select a.key, a.value, b.value from tab a join tab_part b on a.key = b.key POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +Plan optimized by CBO. -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 3 <- Map 1 (CUSTOM_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=242) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Vertex dependency in root stage +Map 2 <- Map 1 (CUSTOM_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 2 + File Output Operator [FS_10] + Select Operator [SEL_9] (rows=550 width=18) + Output:["_col0","_col1","_col2"] + Map Join Operator [MAPJOIN_25] (rows=550 width=18) + BucketMapJoin:true,Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] + <-Map 1 [CUSTOM_EDGE] + MULTICAST [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=242 width=18) + Output:["_col0","_col1"] + Filter Operator [FIL_13] (rows=242 width=18) + predicate:key is not null + TableScan [TS_0] (rows=242 width=18) + default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_5] (rows=500 width=18) + Output:["_col0","_col1"] + Filter Operator [FIL_14] (rows=500 width=18) + predicate:key is not null + TableScan [TS_3] (rows=500 width=18) + default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"]