diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 1bc3a6e..6e16200 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2857,6 +2857,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Bloom filter should be a multiple of this factor with nDV"), TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION("hive.tez.bigtable.minsize.semijoin.reduction", 1000000L, "Big table for runtime filteting should be of atleast this size"), + TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD("hive.tez.dynamic.semijoin.reduction.threshold", (float) 0.50, + "Only perform semijoin optimization if the estimated benefit at or above this fraction of the target table"), TEZ_SMB_NUMBER_WAVES( "hive.tez.smb.number.waves", (float) 0.5, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index 727f7bc..b9f5912 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -663,6 +663,7 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex runtimeValuesInfo.setTableDesc(rsFinalTableDesc); runtimeValuesInfo.setDynamicValueIDs(dynamicValueIDs); runtimeValuesInfo.setColExprs(rsValueCols); + runtimeValuesInfo.setTsColExpr(ctx.parent.getChildren().get(0)); parseContext.getRsToRuntimeValuesInfoMap().put(rsOpFinal, runtimeValuesInfo); return true; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java index 5865f1a..0fe8a27 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RuntimeValuesInfo.java @@ -33,6 +33,8 @@ private TableDesc tableDesc; private List dynamicValueIDs; private List colExprs; + // Column expression of the table being filtered by the semijoin optimization. + private ExprNodeDesc tsColExpr; // get-set methods public TableDesc getTableDesc() { @@ -58,5 +60,13 @@ public void setDynamicValueIDs(List dynamicValueIDs) { public void setColExprs(List colExprs) { this.colExprs = colExprs; } + + public ExprNodeDesc getTsColExpr() { + return tsColExpr; + } + + public void setTsColExpr(ExprNodeDesc tsColExpr) { + this.tsColExpr = tsColExpr; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 468e18e..62bd652 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.parse; +import com.google.common.base.Preconditions; import java.io.Serializable; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; @@ -58,6 +59,8 @@ import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.stats.StatsUtils; /** * TezCompiler translates the operator plan into TezTasks. @@ -101,6 +104,9 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, runStatsDependentOptimizations(procCtx, inputs, outputs); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization"); + // Removing semijoin optimization when it may not be beneficial + removeSemijoinOptimizationByBenefit(procCtx); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // Remove any parallel edge between semijoin and mapjoin. removeSemijoinsParallelToMapJoin(procCtx); @@ -932,4 +938,216 @@ private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx) } } + private static boolean canUseNDV(ColStatistics colStats) { + return (colStats != null) && (colStats.getCountDistint() >= 0); + } + + private static double getBloomFilterCost( + SelectOperator sel, + FilterOperator fil) { + double cost = -1; + Statistics selStats = sel.getStatistics(); + if (selStats != null) { + cost = selStats.getNumRows(); + + // Some other things that could be added here to model cost: + // Cost of computing/sending partial BloomFilter results? BloomFilterSize * # mappers + // For reduce-side join, add the cost of the semijoin table scan/dependent tablescans? + } + return cost; + } + + private static long getCombinedKeyDomainCardinality( + ColStatistics selColStat, + ColStatistics selColSourceStat, + ColStatistics tsColStat) { + long keyDomainCardinality = -1; + if (!canUseNDV(selColStat) || !canUseNDV(tsColStat)) { + return -1; + } + + long selColSourceNdv = canUseNDV(selColSourceStat) ? selColSourceStat.getCountDistint() : -1; + boolean semiJoinKeyIsPK = StatsUtils.inferForeignKey(selColStat, tsColStat); + if (semiJoinKeyIsPK) { + // PK/FQ relationship: NDV of selColSourceStat is a superset of what is in tsColStat + if (selColSourceNdv >= 0) { + // Most accurate domain cardinality would be source column NDV if available. + keyDomainCardinality = selColSourceNdv; + } + } else { + if (selColSourceNdv >= 0) { + // If semijoin keys and ts keys completely unrelated, the cardinality of both sets + // could be obtained by adding both cardinalities. Would there be an average case? + keyDomainCardinality = selColSourceNdv + tsColStat.getCountDistint(); + + // Don't exceed the range if we have one. + if (StatsUtils.hasDiscreteRange(selColStat) + && StatsUtils.hasDiscreteRange(tsColStat)) { + long range = 0; + // Trying using the cardinality from the value range. + ColStatistics.Range combinedRange = StatsUtils.combineRange(selColStat.getRange(), tsColStat.getRange()); + if (combinedRange != null) { + range = StatsUtils.getRangeDelta(combinedRange); + } else { + range = StatsUtils.getRangeDelta(selColStat.getRange()) + + StatsUtils.getRangeDelta(tsColStat.getRange()); + } + keyDomainCardinality = Math.min(keyDomainCardinality, range); + } + } + // Otherwise, we tried .. + } + if (LOG.isDebugEnabled()) { + LOG.debug("Computing key domain cardinality, keyDomainCardinality=" + keyDomainCardinality + + ", semiJoinKeyIsPK=" + semiJoinKeyIsPK + + ", selColStat=" + selColStat + + ", selColSourceStat=" + selColSourceStat + + ", tsColStat=" + tsColStat); + } + + return keyDomainCardinality; + } + + private static double getBloomFilterBenefit( + SelectOperator sel, ExprNodeDesc selExpr, + FilterOperator fil, ExprNodeDesc tsExpr) { + double benefit = -1; + Statistics selStats = sel.getStatistics(); + Statistics filStats = fil.getStatistics(); + if (selStats == null || filStats == null) { + LOG.debug("No stats available to compute BloomFilter benefit"); + return benefit; + } + + // For cardinality values use numRows as default, try to use ColStats if available + long selKeyCardinality = selStats.getNumRows(); + long tsKeyCardinality = filStats.getNumRows(); + long tsRows = filStats.getNumRows(); + long tsRowSize = filStats.getAvgRowSize(); + long keyDomainCardinality = selKeyCardinality + tsKeyCardinality; + + ExprNodeColumnDesc selCol = ExprNodeDescUtils.getColumnExpr(selExpr); + ExprNodeColumnDesc tsCol = ExprNodeDescUtils.getColumnExpr(tsExpr); + if (selCol != null && tsCol != null) { + // Check if there are column stats available for these columns + ColStatistics selColStat = selStats.getColumnStatisticsFromColName(selCol.getColumn()); + ColStatistics filColStat = filStats.getColumnStatisticsFromColName(tsCol.getColumn()); + if (canUseNDV(selColStat)) { + selKeyCardinality = selColStat.getCountDistint(); + } + if (canUseNDV(filColStat)) { + tsKeyCardinality = filColStat.getCountDistint(); + } + // Get colstats for the original table column for selCol if possible, this would have + // more accurate information about the original NDV of the column before any filtering. + ColStatistics selColSourceStat = null; + if (selColStat != null) { + ExprNodeDescUtils.ColumnOrigin selColSource = ExprNodeDescUtils.findColumnOrigin(selCol, sel); + if (selColSource != null && selColSource.op.getStatistics() != null) { + selColSourceStat = selColSource.op.getStatistics().getColumnStatisticsFromColName( + selColSource.col.getColumn()); + } + } + long domainCardinalityFromColStats = getCombinedKeyDomainCardinality( + selColStat, selColSourceStat, filColStat); + if (domainCardinalityFromColStats >= 0) { + keyDomainCardinality = domainCardinalityFromColStats; + } + } + + // Selectivity: key cardinality of semijoin / domain cardinality + // Benefit (rows filtered from ts): (1 - selectivity) * # ts rows + double selectivity = selKeyCardinality / (double) keyDomainCardinality; + selectivity = Math.min(selectivity, 1); + benefit = tsRows * (1 - selectivity); + + if (LOG.isDebugEnabled()) { + LOG.debug("BloomFilter benefit for " + selCol + " to " + tsCol + + ", selKeyCardinality=" + selKeyCardinality + + ", tsKeyCardinality=" + tsKeyCardinality + + ", tsRows=" + tsRows + + ", keyDomainCardinality=" + keyDomainCardinality); + LOG.debug("SemiJoin key selectivity=" + selectivity + + ", benefit=" + benefit); + } + + return benefit; + } + + private static double computeBloomFilterNetBenefit( + SelectOperator sel, ExprNodeDesc selExpr, + FilterOperator fil, ExprNodeDesc tsExpr) { + double netBenefit = -1; + double benefit = getBloomFilterBenefit(sel, selExpr, fil, tsExpr); + Statistics filStats = fil.getStatistics(); + if (benefit > 0 && filStats != null) { + double cost = getBloomFilterCost(sel, fil); + if (cost > 0) { + long filDataSize = filStats.getNumRows(); + netBenefit = (benefit - cost) / filDataSize; + LOG.debug("BloomFilter benefit=" + benefit + + ", cost=" + cost + + ", tsDataSize=" + filDataSize + + ", netBenefit=" + (benefit - cost)); + } + } + LOG.debug("netBenefit=" + netBenefit); + return netBenefit; + } + + private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx) + throws SemanticException { + if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) { + // Not needed without semi-join reduction + return; + } + + List semijoinRsToRemove = new ArrayList(); + Map map = procCtx.parseContext.getRsOpToTsOpMap(); + double semijoinReductionThreshold = procCtx.conf.getFloatVar( + HiveConf.ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD); + for (ReduceSinkOperator rs : map.keySet()) { + // rs is semijoin optimization branch, which should look like -SEL-GB1-RS1-GB2-RS2 + // Get to the SelectOperator ancestor + SelectOperator sel = null; + for (Operator currOp = rs; currOp.getParentOperators().size() > 0; currOp = currOp.getParentOperators().get(0)) { + if (currOp instanceof SelectOperator) { + sel = (SelectOperator) currOp; + break; + } + } + if (sel == null) { + throw new SemanticException("Unexpected error - could not find SEL ancestor from semijoin branch of " + rs); + } + + // Check the ndv/rows from the SEL vs the destination tablescan the semijoin opt is going to. + TableScanOperator ts = map.get(rs); + RuntimeValuesInfo rti = procCtx.parseContext.getRsToRuntimeValuesInfoMap().get(rs); + ExprNodeDesc tsExpr = rti.getTsColExpr(); + // In the SEL operator of the semijoin branch, there should be only one column in the operator + ExprNodeDesc selExpr = sel.getConf().getColList().get(0); + + if (LOG.isDebugEnabled()) { + LOG.debug("Computing BloomFilter cost/benefit for " + OperatorUtils.getOpNamePretty(rs) + + " - " + OperatorUtils.getOpNamePretty(ts) + " (" + tsExpr + ")"); + } + + double reductionFactor = computeBloomFilterNetBenefit(sel, selExpr, + (FilterOperator)ts.getChildOperators().get(0), tsExpr); + if (reductionFactor < semijoinReductionThreshold) { + // This semijoin optimization should be removed. Do it after we're done iterating + semijoinRsToRemove.add(rs); + } + } + + for (ReduceSinkOperator rs : semijoinRsToRemove) { + TableScanOperator ts = map.get(rs); + if (LOG.isDebugEnabled()) { + LOG.debug("Reduction factor not satisfied for " + OperatorUtils.getOpNamePretty(rs) + + "-" + OperatorUtils.getOpNamePretty(ts) + ". Removing semijoin optimization."); + } + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs, ts); + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index fac60c1..bfc1eca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -786,4 +787,74 @@ private static boolean checkPrefixKeys(List childKeys, List op; + + public ColumnOrigin(ExprNodeColumnDesc col, Operator op) { + super(); + this.col = col; + this.op = op; + } + } + + private static ExprNodeDesc findParentExpr(ExprNodeColumnDesc col, Operator op) { + if (op instanceof ReduceSinkOperator) { + return col; + } + + ExprNodeDesc parentExpr = col; + Map mapping = op.getColumnExprMap(); + if (mapping != null) { + parentExpr = mapping.get(col.getColumn()); + } + return parentExpr; + } + + public static ColumnOrigin findColumnOrigin(ExprNodeDesc expr, Operator op) { + if (expr == null || op == null) { + // bad input + return null; + } + + ExprNodeColumnDesc col = ExprNodeDescUtils.getColumnExpr(expr); + if (col == null) { + // not a column + return null; + } + + Operator parentOp = null; + int numParents = op.getNumParent(); + if (numParents == 0) { + return new ColumnOrigin(col, op); + } + + ExprNodeDesc parentExpr = findParentExpr(col, op); + if (parentExpr == null) { + // couldn't find proper parent column expr + return null; + } + + if (numParents == 1) { + parentOp = op.getParentOperators().get(0); + } else { + // Multiple parents - find the right one based on the table alias in the parentExpr + ExprNodeColumnDesc parentCol = ExprNodeDescUtils.getColumnExpr(parentExpr); + if (parentCol != null) { + for (Operator currParent : op.getParentOperators()) { + if (currParent.getSchema().getTableNames().contains(parentCol.getTabAlias())) { + parentOp = currParent; + break; + } + } + } + } + + if (parentOp == null) { + return null; + } + + return findColumnOrigin(parentExpr, parentOp); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 413aacf..bda2050 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -75,6 +75,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; @@ -100,6 +101,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BytesWritable; @@ -393,7 +396,7 @@ public static float getScaledSelectivity(ColStatistics csPK, ColStatistics csFK) return scaledSelectivity; } - private static long getRangeDelta(ColStatistics.Range range) { + public static long getRangeDelta(ColStatistics.Range range) { if (range.minValue != null && range.maxValue != null) { return (range.maxValue.longValue() - range.minValue.longValue()); } @@ -1684,4 +1687,46 @@ public static int getNumBitVectorsForNDVEstimation(HiveConf conf) throws Semanti } return numBitVectors; } + + public static boolean hasDiscreteRange(ColStatistics colStat) { + if (colStat.getRange() != null) { + TypeInfo colType = TypeInfoUtils.getTypeInfoFromTypeString(colStat.getColumnType()); + if (colType.getCategory() == Category.PRIMITIVE) { + PrimitiveTypeInfo pti = (PrimitiveTypeInfo) colType; + switch (pti.getPrimitiveCategory()) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + return true; + default: + break; + } + } + } + return false; + } + + public static Range combineRange(Range range1, Range range2) { + if ( range1.minValue != null && range1.maxValue != null + && range2.minValue != null && range2.maxValue != null) { + long min1 = range1.minValue.longValue(); + long max1 = range1.maxValue.longValue(); + long min2 = range2.minValue.longValue(); + long max2 = range2.maxValue.longValue(); + + if ( (min1 < min2 && max1 < max2) + || (min1 > min2 && max1 > max2)) { + // No overlap between the two ranges + return null; + } else { + // There is an overlap of ranges - create combined range. + return new ColStatistics.Range( + Math.min(min1, min2), + Math.max(max1, max2)); + } + } + return null; + } } diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q index f04a923..6338ac3 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q @@ -10,6 +10,7 @@ set hive.optimize.index.filter=true; set hive.stats.autogather=true; set hive.tez.bigtable.minsize.semijoin.reduction=1; set hive.tez.min.bloom.filter.entries=1; +set hive.stats.fetch.column.stats=true; -- Create Tables create table alltypesorc_int ( cint int, cstring string ) stored as ORC; @@ -27,7 +28,8 @@ alter table srcpart_small add partition (ds = "2008-04-09"); insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc; insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"; insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; -insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; +insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20; + set hive.tez.dynamic.semijoin.reduction=false; analyze table alltypesorc_int compute statistics for columns; diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q index 88386a6..55f6e8a 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q @@ -9,6 +9,7 @@ set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.tez.bigtable.minsize.semijoin.reduction=1; set hive.tez.min.bloom.filter.entries=1; +set hive.tez.dynamic.semijoin.reduction.threshold=-999999999999; CREATE TABLE `table_1`( `bigint_col_7` bigint, diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q index d5fe136..18408e4 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_3.q @@ -14,6 +14,8 @@ set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.tez.bigtable.minsize.semijoin.reduction=1; set hive.tez.min.bloom.filter.entries=1; +set hive.tez.dynamic.semijoin.reduction.threshold=-999999999999; + -- Try with merge statements create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q index 4bdff42..c4784bd 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q @@ -12,6 +12,8 @@ set hive.tez.min.bloom.filter.entries=1; set hive.vectorized.adaptor.usage.mode=none; set hive.vectorized.execution.enabled=true; +set hive.stats.fetch.column.stats=true; +set hive.tez.dynamic.semijoin.reduction.threshold=-999999999999; -- Create Tables create table dsrv2_big stored as orc as diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index e514e2e..35dde96 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -3135,11 +3135,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 7 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3163,7 +3161,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: unknown - Map 6 + Map 5 Map Operator Tree: TableScan alias: srcpart_date @@ -3183,14 +3181,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: ((UDFToDouble(hr) = 13.0) and (hr BETWEEN DynamicValue(RS_12_srcpart_hr_min) AND DynamicValue(RS_12_srcpart_hr_max) and in_bloom_filter(hr, DynamicValue(RS_12_srcpart_hr_bloom_filter)))) (type: boolean) + filterExpr: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(hr) = 13.0) and (hr BETWEEN DynamicValue(RS_12_srcpart_hr_min) AND DynamicValue(RS_12_srcpart_hr_max) and in_bloom_filter(hr, DynamicValue(RS_12_srcpart_hr_bloom_filter)))) (type: boolean) + predicate: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -3219,19 +3217,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3266,18 +3251,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 235fed0..c1dd8c0 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -88,13 +88,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +PREHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpart_small@ds=2008-04-09 -POSTHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +POSTHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 @@ -190,19 +190,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -214,7 +214,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -264,7 +264,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) @@ -309,25 +309,25 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -346,7 +346,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -375,7 +375,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -408,7 +408,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds) @@ -466,16 +466,16 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 360000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 7200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -487,7 +487,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -580,19 +580,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Map 5 @@ -626,7 +626,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Statistics: Num rows: 16008 Data size: 128064 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 320 Data size: 2560 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -678,7 +678,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -48 +0 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring) @@ -726,25 +726,25 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -755,9 +755,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -811,7 +811,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Statistics: Num rows: 16008 Data size: 128064 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 320 Data size: 2560 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -840,7 +840,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -852,7 +852,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -899,7 +899,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -48 +0 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) @@ -942,19 +942,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string), value1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -966,7 +966,7 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 45 Data size: 360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1016,7 +1016,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) @@ -1062,25 +1062,25 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string), value1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1091,9 +1091,9 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=36) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1112,7 +1112,7 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 45 Data size: 360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1141,7 +1141,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1153,7 +1153,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=36) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1186,7 +1186,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) @@ -1208,26 +1208,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: alltypesorc_int - filterExpr: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cstring is not null (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan alias: srcpart_date filterExpr: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE @@ -1239,31 +1219,51 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: all inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -1274,14 +1274,14 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1291,7 +1291,7 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 319 Data size: 2552 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1357,93 +1357,60 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) - Map 5 <- Reducer 8 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: alltypesorc_int - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_cstring_min) AND DynamicValue(RS_10_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_cstring_bloom_filter)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_cstring_min) AND DynamicValue(RS_10_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_cstring_bloom_filter)))) (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan alias: srcpart_date - filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean) + predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1453,6 +1420,26 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs + Map 8 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1461,14 +1448,27 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=42) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1478,7 +1478,7 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 319 Data size: 2552 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1503,23 +1503,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=42) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 8 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL @@ -1593,7 +1593,7 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1610,19 +1610,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -1665,7 +1665,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) @@ -1704,7 +1704,7 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1721,19 +1721,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -1776,7 +1776,7 @@ POSTHOOK: Input: default@srcpart_small POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### -8224 +176 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) @@ -1790,56 +1790,14 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: alltypesorc_int - filterExpr: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cstring is not null (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan alias: srcpart_date filterExpr: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE @@ -1850,35 +1808,77 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs - Reducer 2 + Map 3 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 0 Map 1 + Statistics: Num rows: 319 Data size: 2552 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1934,110 +1934,95 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Map 3 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: alltypesorc_int - filterExpr: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + alias: srcpart_date + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: cstring is not null (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 3 - Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: bigint) + 1 Map 2 + Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 195 Data size: 17745 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: srcpart_date - filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Execution mode: llap - LLAP IO: all inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 0 Map 1 + Statistics: Num rows: 319 Data size: 2552 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410) + aggregations: count() mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs - Reducer 2 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2052,18 +2037,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -2138,10 +2111,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Union 3 - Statistics: Num rows: 4922 Data size: 910570 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3314 Data size: 613090 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 4922 Data size: 910570 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3314 Data size: 613090 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2165,7 +2138,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2020 Data size: 175740 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Map 4 @@ -2173,19 +2146,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2020 Data size: 175740 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Union 3 diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out index c1fde0b..64929cc 100644 --- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -284,10 +284,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -308,29 +306,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_6_a_key_min) AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -377,18 +362,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1401,10 +1374,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1421,38 +1392,22 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key BETWEEN DynamicValue(RS_4_a_key_min) AND DynamicValue(RS_4_a_key_max) and in_bloom_filter(key, DynamicValue(RS_4_a_key_bloom_filter))) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1489,18 +1444,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1542,10 +1485,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1553,18 +1494,15 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key BETWEEN DynamicValue(RS_5_b_key_min) AND DynamicValue(RS_5_b_key_max) and in_bloom_filter(key, DynamicValue(RS_5_b_key_bloom_filter))) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 @@ -1581,19 +1519,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1630,18 +1555,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1790,13 +1703,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 8 (BROADCAST_EDGE) - Map 6 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1818,29 +1729,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_9_a_key_min) AND DynamicValue(RS_9_a_key_max) and in_bloom_filter(key, DynamicValue(RS_9_a_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_9_a_key_min) AND DynamicValue(RS_9_a_key_max) and in_bloom_filter(key, DynamicValue(RS_9_a_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -1853,7 +1751,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: c @@ -1936,19 +1834,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 8 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1997,10 +1883,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2021,29 +1905,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: b - filterExpr: (value is not null and (value BETWEEN DynamicValue(RS_6_a_value_min) AND DynamicValue(RS_6_a_value_max) and in_bloom_filter(value, DynamicValue(RS_6_a_value_bloom_filter)))) (type: boolean) + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (value BETWEEN DynamicValue(RS_6_a_value_min) AND DynamicValue(RS_6_a_value_max) and in_bloom_filter(value, DynamicValue(RS_6_a_value_bloom_filter)))) (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) @@ -2090,18 +1961,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -2145,12 +2004,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 8 <- Union 3 (CONTAINS) - Map 9 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 9 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) + Map 7 <- Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Map 8 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Union 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2173,7 +2030,7 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -2193,7 +2050,7 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -2211,29 +2068,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 9 + Map 8 Map Operator Tree: TableScan alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_18_s1_key_min) AND DynamicValue(RS_18_s1_key_max) and in_bloom_filter(key, DynamicValue(RS_18_s1_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_18_s1_key_min) AND DynamicValue(RS_18_s1_key_max) and in_bloom_filter(key, DynamicValue(RS_18_s1_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -2262,19 +2106,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2309,18 +2140,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1016) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Union 3 Vertex: Union 3 @@ -2343,10 +2162,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2367,29 +2184,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: b - filterExpr: (value is not null and (value BETWEEN DynamicValue(RS_6_a_value_min) AND DynamicValue(RS_6_a_value_max) and in_bloom_filter(value, DynamicValue(RS_6_a_value_bloom_filter)))) (type: boolean) + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (value BETWEEN DynamicValue(RS_6_a_value_min) AND DynamicValue(RS_6_a_value_max) and in_bloom_filter(value, DynamicValue(RS_6_a_value_bloom_filter)))) (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) @@ -2436,18 +2240,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -2483,13 +2275,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 8 (BROADCAST_EDGE) - Map 6 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2511,29 +2301,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_9_a_key_min) AND DynamicValue(RS_9_a_key_max) and in_bloom_filter(key, DynamicValue(RS_9_a_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_9_a_key_min) AND DynamicValue(RS_9_a_key_max) and in_bloom_filter(key, DynamicValue(RS_9_a_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -2546,7 +2323,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: c @@ -2629,19 +2406,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 8 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2698,12 +2463,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 8 <- Union 3 (CONTAINS) - Map 9 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 9 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) + Map 7 <- Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Map 8 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Union 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2726,7 +2489,7 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -2746,7 +2509,7 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -2764,29 +2527,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 9 + Map 8 Map Operator Tree: TableScan alias: b - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_18_s1_key_min) AND DynamicValue(RS_18_s1_key_max) and in_bloom_filter(key, DynamicValue(RS_18_s1_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_18_s1_key_min) AND DynamicValue(RS_18_s1_key_max) and in_bloom_filter(key, DynamicValue(RS_18_s1_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -2815,19 +2565,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1016) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2862,18 +2599,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1016) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Union 3 Vertex: Union 3 @@ -2910,12 +2635,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2937,14 +2660,14 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: t2 - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_12_t1_key_min) AND DynamicValue(RS_12_t1_key_max) and in_bloom_filter(key, DynamicValue(RS_12_t1_key_bloom_filter)))) (type: boolean) + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_12_t1_key_min) AND DynamicValue(RS_12_t1_key_max) and in_bloom_filter(key, DynamicValue(RS_12_t1_key_bloom_filter)))) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) @@ -2968,19 +2691,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=484) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3015,19 +2725,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=484) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 7 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 6a9a76c..3967d11 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -3486,11 +3486,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 7 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3520,7 +3518,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: true vectorized: true - Map 6 + Map 5 Map Operator Tree: TableScan alias: srcpart_date @@ -3548,14 +3546,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 7 + Map 6 Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: ((UDFToDouble(hr) = 13.0) and (hr BETWEEN DynamicValue(RS_12_srcpart_hr_min) AND DynamicValue(RS_12_srcpart_hr_max) and in_bloom_filter(hr, DynamicValue(RS_12_srcpart_hr_bloom_filter)))) (type: boolean) + filterExpr: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(hr) = 13.0) and (hr BETWEEN DynamicValue(RS_12_srcpart_hr_min) AND DynamicValue(RS_12_srcpart_hr_max) and in_bloom_filter(hr, DynamicValue(RS_12_srcpart_hr_bloom_filter)))) (type: boolean) + predicate: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -3592,19 +3590,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3646,25 +3631,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator