diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 3672c7afef..4f01bcab45 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -485,6 +485,7 @@ minillaplocal.query.files=\ colstats_date_min_max.q,\ compare_double_bigint_2.q,\ constprog_dpp.q,\ + constraints_optimization.q,\ current_date_timestamp.q,\ correlationoptimizer1.q,\ correlationoptimizer2.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 42e60de6a8..99bbad5439 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -167,6 +167,8 @@ public Expression getExpression(Class clazz) { throw new UnsupportedOperationException(); } + public List getKeys() { return keys; } + @Override public RelOptTable extend(List extendedFields) { throw new UnsupportedOperationException(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index 5857f730a8..8f49620306 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -17,22 +17,21 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; -import java.util.ArrayList; -import java.util.Collections; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.linq4j.Ord; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; @@ -309,9 +308,52 @@ else if(rexNode instanceof RexCall return false; } } + + private ImmutableBitSet generateNewGroupset(Aggregate aggregate, ImmutableBitSet fieldsUsed) { + + ImmutableBitSet originalGroupSet = aggregate.getGroupSet(); + + if (aggregate.getGroupSets().size() > 1 || aggregate.getIndicatorCount() > 0 + || fieldsUsed.contains(originalGroupSet)) { + // if there is grouping sets, indicator or all the group keys are being used we do no need to proceed further + return originalGroupSet; + } + + final RelNode input = aggregate.getInput(); + RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery(); + + // there is at least one gb key not being used, we need to find all unique keys to get to the column + // which isn't unique key + final Set uniqueKeys = mq.getUniqueKeys(input, false); + if (uniqueKeys == null || uniqueKeys.isEmpty()) { + return originalGroupSet; + } + + // we have set of unique key, get to the key which is same as group by key + ImmutableBitSet groupByUniqueKey = null; + + for (ImmutableBitSet key : uniqueKeys) { + if (aggregate.getGroupSet().contains(key)) { + groupByUniqueKey = key; + break; + } + } + + if (groupByUniqueKey == null) { + // group by keys do not represent unique keys + return originalGroupSet; + } + + // we know group by key contains primary key and there is at least one column in group by which is not being used + // if that column is not part of key it should be removed + ImmutableBitSet nonKeyColumns = aggregate.getGroupSet().except(groupByUniqueKey); + ImmutableBitSet columnsToRemove = nonKeyColumns.except(fieldsUsed); + ImmutableBitSet newGroupSet = aggregate.getGroupSet().except(columnsToRemove); + + return newGroupSet; + } + /** - * Variant of {@link #trimFields(Aggregate, ImmutableBitSet, Set)} for - * {@link org.apache.calcite.rel.logical.LogicalAggregate}. * This method replaces group by 'constant key' with group by true (boolean) * if and only if * group by doesn't have grouping sets @@ -323,49 +365,193 @@ else if(rexNode instanceof RexCall * * This is mainly done so that hive is able to push down queries with * group by 'constant key with type not supported by druid' into druid + * */ - public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, - Set extraFields) { - - Aggregate newAggregate = aggregate; - if (!(aggregate.getIndicatorCount() > 0) - && !(aggregate.getGroupSet().isEmpty()) - && !fieldsUsed.contains(aggregate.getGroupSet())) { - final RelNode input = aggregate.getInput(); - final RelDataType rowType = input.getRowType(); - RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); - final List newProjects = new ArrayList<>(); - - final List inputExprs = input.getChildExps(); - if(inputExprs == null || inputExprs.isEmpty()) { - return super.trimFields(newAggregate, fieldsUsed, extraFields); + + private Aggregate rewriteGBConstantKeys(Aggregate aggregate, ImmutableBitSet fieldsUsed, + Set extraFields) { + if ((aggregate.getIndicatorCount() > 0) + || (aggregate.getGroupSet().isEmpty()) + || fieldsUsed.contains(aggregate.getGroupSet())) { + return aggregate; + } + + final RelNode input = aggregate.getInput(); + + + final RelDataType rowType = input.getRowType(); + RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); + final List newProjects = new ArrayList<>(); + + final List inputExprs = input.getChildExps(); + if (inputExprs == null || inputExprs.isEmpty()) { + return aggregate; + } + + boolean allConstants = true; + for (int key : aggregate.getGroupSet()) { + // getChildExprs on Join could return less number of expressions than there are coming out of join + if (inputExprs.size() <= key || !isRexLiteral(inputExprs.get(key))) { + allConstants = false; + break; } + } - boolean allConstants = true; - for(int key : aggregate.getGroupSet()) { - // getChildExprs on Join could return less number of expressions than there are coming out of join - if(inputExprs.size() <= key || !isRexLiteral(inputExprs.get(key))){ - allConstants = false; - break; + if (allConstants) { + for (int i = 0; i < rowType.getFieldCount(); i++) { + if (aggregate.getGroupSet().get(i)) { + newProjects.add(rexBuilder.makeLiteral(true)); + } else { + newProjects.add(rexBuilder.makeInputRef(input, i)); } } + relBuilder.push(input); + relBuilder.project(newProjects); + Aggregate newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(), + aggregate.getGroupSet(), null, aggregate.getAggCallList()); + return newAggregate; + } + return aggregate; + } + + @Override + public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Set extraFields) { + // Fields: + // + // | sys fields | group fields | indicator fields | agg functions | + // + // Two kinds of trimming: + // + // 1. If agg rel has system fields but none of these are used, create an + // agg rel with no system fields. + // + // 2. If aggregate functions are not used, remove them. + // + // But group and indicator fields stay, even if they are not used. + + aggregate = rewriteGBConstantKeys(aggregate, fieldsUsed, extraFields); + + final RelDataType rowType = aggregate.getRowType(); + + // Compute which input fields are used. + // 1. group fields are always used + final ImmutableBitSet.Builder inputFieldsUsed = + aggregate.getGroupSet().rebuild(); + // 2. agg functions + for (AggregateCall aggCall : aggregate.getAggCallList()) { + for (int i : aggCall.getArgList()) { + inputFieldsUsed.set(i); + } + if (aggCall.filterArg >= 0) { + inputFieldsUsed.set(aggCall.filterArg); + } + } - if (allConstants) { - for (int i = 0; i < rowType.getFieldCount(); i++) { - if (aggregate.getGroupSet().get(i)) { - newProjects.add(rexBuilder.makeLiteral(true)); - } else { - newProjects.add(rexBuilder.makeInputRef(input, i)); - } - } - relBuilder.push(input); - relBuilder.project(newProjects); - newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(), - aggregate.getGroupSet(), null, aggregate.getAggCallList()); + // Create input with trimmed columns. + final RelNode input = aggregate.getInput(); + final Set inputExtraFields = Collections.emptySet(); + final TrimResult trimResult = + trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields); + final RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + + ImmutableBitSet originalGroupSet = aggregate.getGroupSet(); + ImmutableBitSet updatedGroupSet = generateNewGroupset(aggregate, fieldsUsed); + ImmutableBitSet gbKeysDeleted = originalGroupSet.except(updatedGroupSet); + ImmutableBitSet updatedGroupFields = ImmutableBitSet.range(originalGroupSet.cardinality()); + final int updatedGroupCount = updatedGroupSet.cardinality(); + + // we need to clear the bits corresponding to deleted gb keys + int setIdx = 0; + while(setIdx != -1) { + setIdx = gbKeysDeleted.nextSetBit(setIdx); + if(setIdx != -1) { + updatedGroupFields = updatedGroupFields.clear(setIdx); + setIdx++; + } + } + fieldsUsed = + fieldsUsed.union(updatedGroupFields); + + // If the input is unchanged, and we need to project all columns, + // there's nothing to do. + if (input == newInput + && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) { + return result(aggregate, + Mappings.createIdentity(rowType.getFieldCount())); + } + + // update the group by keys based on inputMapping + ImmutableBitSet newGroupSet = + Mappings.apply(inputMapping, updatedGroupSet); + + // Which agg calls are used by our consumer? + int originalGroupCount = aggregate.getGroupSet().cardinality(); + int j = originalGroupCount; + int usedAggCallCount = 0; + for (int i = 0; i < aggregate.getAggCallList().size(); i++) { + if (fieldsUsed.get(j++)) { + ++usedAggCallCount; + } + } + + // Offset due to the number of system fields having changed. + Mapping mapping = + Mappings.create( + MappingType.INVERSE_SURJECTION, + rowType.getFieldCount(), + updatedGroupCount + usedAggCallCount); + + + // if group keys were reduced, it means we didn't have grouping therefore + // we don't need to transform group sets + ImmutableList newGroupSets = null; + if(!updatedGroupSet.equals(aggregate.getGroupSet())) { + newGroupSets = ImmutableList.of(newGroupSet); + } else { + newGroupSets = ImmutableList.copyOf( + Iterables.transform(aggregate.getGroupSets(), + input1 -> Mappings.apply(inputMapping, input1))); + } + + // Populate mapping of where to find the fields. System, group key and + // indicator fields first. + int gbKeyIdx = 0; + for (j = 0; j < originalGroupCount; j++) { + if(fieldsUsed.get(j)) { + mapping.set(j, gbKeyIdx); + gbKeyIdx++; + } + } + + // Now create new agg calls, and populate mapping for them. + relBuilder.push(newInput); + final List newAggCallList = new ArrayList<>(); + j = originalGroupCount; // because lookup in fieldsUsed is done using original group count + for (AggregateCall aggCall : aggregate.getAggCallList()) { + if (fieldsUsed.get(j)) { + final ImmutableList args = + relBuilder.fields( + Mappings.apply2(inputMapping, aggCall.getArgList())); + final RexNode filterArg = aggCall.filterArg < 0 ? null + : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg)); + RelBuilder.AggCall newAggCall = + relBuilder.aggregateCall(aggCall.getAggregation(), + aggCall.isDistinct(), aggCall.isApproximate(), + filterArg, aggCall.name, args); + mapping.set(j, updatedGroupCount + newAggCallList.size()); + newAggCallList.add(newAggCall); } + ++j; } - return super.trimFields(newAggregate, fieldsUsed, extraFields); + + final RelBuilder.GroupKey groupKey = + relBuilder.groupKey(newGroupSet, newGroupSets); + relBuilder.aggregate(groupKey, newAggCallList); + + return result(relBuilder.build(), mapping); } + /** * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for * {@link org.apache.calcite.rel.logical.LogicalProject}. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index 1ca1937ed9..e2881e733b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -329,7 +329,7 @@ private static double pkSelectivity(Join joinRel, RelMetadataQuery mq, boolean l private static boolean isKey(ImmutableBitSet c, RelNode rel, RelMetadataQuery mq) { boolean isKey = false; - Set keys = mq.getUniqueKeys(rel); + Set keys = mq.getUniqueKeys(rel, true); if (keys != null) { for (ImmutableBitSet key : keys) { if (key.equals(c)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java index 3bf62c535c..c2c2854543 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java @@ -40,6 +40,7 @@ import org.apache.calcite.util.BitSets; import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.plan.ColStatistics; @@ -54,76 +55,11 @@ return BuiltInMetadata.UniqueKeys.DEF; } - /* - * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col) - * - min(col) = rowCount(col) - * - * Why are we intercepting Project and not TableScan? Because if we - * have a method for TableScan, it will not know which columns to check for. - * Inferring Uniqueness for all columns is very expensive right now. The flip - * side of doing this is, it only works post Field Trimming. - */ - public Set getUniqueKeys(Project rel, RelMetadataQuery mq, boolean ignoreNulls) { - - HiveTableScan tScan = getTableScan(rel.getInput(), false); - - if (tScan == null) { - // If HiveTableScan is not found, e.g., not sequence of Project and - // Filter operators, execute the original getUniqueKeys method - - // LogicalProject maps a set of rows to a different set; - // Without knowledge of the mapping function(whether it - // preserves uniqueness), it is only safe to derive uniqueness - // info from the child of a project when the mapping is f(a) => a. - // - // Further more, the unique bitset coming from the child needs - // to be mapped to match the output of the project. - final Map mapInToOutPos = new HashMap<>(); - final List projExprs = rel.getProjects(); - final Set projUniqueKeySet = new HashSet<>(); - - // Build an input to output position map. - for (int i = 0; i < projExprs.size(); i++) { - RexNode projExpr = projExprs.get(i); - if (projExpr instanceof RexInputRef) { - mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i); - } - } - - if (mapInToOutPos.isEmpty()) { - // if there's no RexInputRef in the projected expressions - // return empty set. - return projUniqueKeySet; - } - - Set childUniqueKeySet = - mq.getUniqueKeys(rel.getInput(), ignoreNulls); - - if (childUniqueKeySet != null) { - // Now add to the projUniqueKeySet the child keys that are fully - // projected. - for (ImmutableBitSet colMask : childUniqueKeySet) { - ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder(); - boolean completeKeyProjected = true; - for (int bit : colMask) { - if (mapInToOutPos.containsKey(bit)) { - tmpMask.set(mapInToOutPos.get(bit)); - } else { - // Skip the child unique key if part of it is not - // projected. - completeKeyProjected = false; - break; - } - } - if (completeKeyProjected) { - projUniqueKeySet.add(tmpMask.build()); - } - } - } - - return projUniqueKeySet; - } + //Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col) + // - min(col) = rowCount(col) + private Set generateKeysUsingStatsEstimation(Project rel, RelMetadataQuery mq, + HiveTableScan tScan) { Map posMap = new HashMap(); int projectPos = 0; int colStatsPos = 0; @@ -140,7 +76,7 @@ double numRows = mq.getRowCount(tScan); List colStats = tScan.getColStat(BitSets - .toList(projectedCols)); + .toList(projectedCols)); Set keys = new HashSet(); colStatsPos = 0; @@ -149,14 +85,14 @@ if (cStat.getCountDistint() >= numRows) { isKey = true; } - if ( !isKey && cStat.getRange() != null && - cStat.getRange().maxValue != null && + if (!isKey && cStat.getRange() != null && + cStat.getRange().maxValue != null && cStat.getRange().minValue != null) { - double r = cStat.getRange().maxValue.doubleValue() - + double r = cStat.getRange().maxValue.doubleValue() - cStat.getRange().minValue.doubleValue() + 1; isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON); } - if ( isKey ) { + if (isKey) { ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos)); keys.add(key); } @@ -164,6 +100,91 @@ } return keys; + + } + + /* + * This API is being used for two separate things (unfortunately). Original use is to infer uniquness based + * on statistics and the second use is to infer based on key constraints. This is distinguished using ignoreNulls + * param. If true we go with stats estimation and if false we go with constraint based (used in rel trimmer to + * eliminate group by keys) + * + */ + public Set getUniqueKeys(Project rel, RelMetadataQuery mq, boolean ignoreNulls) { + + if (ignoreNulls) { //called by iskey of Hiverelmdrowcount + HiveTableScan tScan = getTableScan(rel.getInput(), false); + if (tScan != null) { + return generateKeysUsingStatsEstimation(rel, mq, tScan); + } + } + + // If HiveTableScan is not found, e.g., not sequence of Project and + // Filter operators, execute the original getUniqueKeys method + + // LogicalProject maps a set of rows to a different set; + // Without knowledge of the mapping function(whether it + // preserves uniqueness), it is only safe to derive uniqueness + // info from the child of a project when the mapping is f(a) => a. + // + // Further more, the unique bitset coming from the child needs + // to be mapped to match the output of the project. + final Map mapInToOutPos = new HashMap<>(); + final List projExprs = rel.getProjects(); + final Set projUniqueKeySet = new HashSet<>(); + + // Build an input to output position map. + for (int i = 0; i < projExprs.size(); i++) { + RexNode projExpr = projExprs.get(i); + if (projExpr instanceof RexInputRef) { + mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i); + } + } + + if (mapInToOutPos.isEmpty()) { + // if there's no RexInputRef in the projected expressions + // return empty set. + return projUniqueKeySet; + } + + Set childUniqueKeySet = + mq.getUniqueKeys(rel.getInput(), ignoreNulls); + + if (childUniqueKeySet != null) { + // Now add to the projUniqueKeySet the child keys that are fully + // projected. + for (ImmutableBitSet colMask : childUniqueKeySet) { + ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder(); + boolean completeKeyProjected = true; + for (int bit : colMask) { + if (mapInToOutPos.containsKey(bit)) { + tmpMask.set(mapInToOutPos.get(bit)); + } else { + // Skip the child unique key if part of it is not + // projected. + completeKeyProjected = false; + break; + } + } + if (completeKeyProjected) { + projUniqueKeySet.add(tmpMask.build()); + } + } + } + return projUniqueKeySet; + } + + public Set getUniqueKeys(HiveTableScan rel, RelMetadataQuery mq, + boolean ignoreNulls) { + //TODO: ignoreNulls need to be taken into account + RelOptHiveTable tbl = (RelOptHiveTable) rel.getTable(); + List keyList = tbl.getKeys(); + if (keyList != null) { + //TODO: this operation is expensive, RelOptHiveTable should be updated to keep set instead of list + Set keySet = new HashSet<>(keyList); + return keySet; + } + return null; } /* diff --git a/ql/src/test/queries/clientpositive/constraints_optimization.q b/ql/src/test/queries/clientpositive/constraints_optimization.q new file mode 100644 index 0000000000..2be69580ab --- /dev/null +++ b/ql/src/test/queries/clientpositive/constraints_optimization.q @@ -0,0 +1,137 @@ +set hive.strict.checks.cartesian.product=false; + +CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely); + +CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_id` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`, `d_id`) disable rely); + + -- group by key has single primary key + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey; + + -- mix of primary + non-primary keys + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation; + + -- multiple keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id; + + -- multiple keys + non-keys + different order + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason + order by d_datekey limit 10; + + -- multiple keys in different order and mixed with non-keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10; + + -- same as above but with aggregate + EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10; + + -- join + insert into dates_removal_n0(d_datekey, d_id) values(3, 0); + insert into dates_removal_n0(d_datekey, d_id) values(3, 1); + insert into customer_removal_n0 (c_custkey) values(3); + + EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id; + SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id; + + -- group by keys are not primary keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason + order by d_datekey limit 10; + + -- negative + -- with aggregate function + EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA') + group by c_custkey, c_nation; + + DROP TABLE customer_removal_n0; + DROP TABLE dates_removal_n0; + + -- group by reduction optimization + create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely); + insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null); + + -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique + explain select key1 from dest_g21 group by key1, value1; + select key1 from dest_g21 group by key1, value1; + -- same query but with filter + explain select key1 from dest_g21 where value1 > 1 group by key1, value1; + select key1 from dest_g21 where value1 > 1 group by key1, value1; + + -- only value1 will be removed because there is aggregate call + explain select count(key1) from dest_g21 group by key1, value1; + select count(key1) from dest_g21 group by key1, value1; + + explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1; + select count(key1) from dest_g21 where value1 > 1 group by key1, value1; + + -- t1.key is unique even after join therefore group by = group by (t1.key) + explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + + explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + + -- both aggregate and one of the key1 should be removed + explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub; + select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub; + + -- one of the aggregate will be removed and one of the key1 will be removed + explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub; + select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub; + + DROP table dest_g21; + +CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string); +INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010'); + +-- explicit NOT NULL filter +explain select i, j from tconst where i is not null group by i,j, d_year; +select i, j from tconst where i is not null group by i,j, d_year; + +-- filter on i should be removed +explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year; +select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year; + +-- where will be removed since i is not null is always true +explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year; +select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year; + +-- should not have implicit filter on join keys +explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year; +select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year; + +-- both join keys have NOT NULL +explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year; +select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year; + +DROP TABLE tconst; + + + diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out new file mode 100644 index 0000000000..ffac8c858f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -0,0 +1,1759 @@ +PREHOOK: query: CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_removal_n0 +PREHOOK: query: CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_id` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`, `d_id`) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_id` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`, `d_id`) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dates_removal_n0 +PREHOOK: query: -- group by key has single primary key + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey +PREHOOK: type: QUERY +POSTHOOK: query: -- group by key has single primary key + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: customer_removal_n0 + filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Filter Operator + predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: -- mix of primary + non-primary keys + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation +PREHOOK: type: QUERY +POSTHOOK: query: -- mix of primary + non-primary keys + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: customer_removal_n0 + filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Filter Operator + predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: -- multiple keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id +PREHOOK: type: QUERY +POSTHOOK: query: -- multiple keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: -- multiple keys + non-keys + different order + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason + order by d_datekey limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- multiple keys + non-keys + different order + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason + order by d_datekey limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: -- multiple keys in different order and mixed with non-keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- multiple keys in different order and mixed with non-keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: -- same as above but with aggregate + EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- same as above but with aggregate + EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint), d_id (type: bigint) + outputColumnNames: d_datekey, d_id + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: d_datekey (type: bigint), d_id (type: bigint) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Group By Operator + aggregations: count() + keys: d_datekey (type: bigint), d_id (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- join + insert into dates_removal_n0(d_datekey, d_id) values(3, 0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: -- join + insert into dates_removal_n0(d_datekey, d_id) values(3, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dates_removal_n0 +POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE [] +PREHOOK: query: insert into dates_removal_n0(d_datekey, d_id) values(3, 1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: insert into dates_removal_n0(d_datekey, d_id) values(3, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dates_removal_n0 +POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE [] +PREHOOK: query: insert into customer_removal_n0 (c_custkey) values(3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: insert into customer_removal_n0 (c_custkey) values(3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@customer_removal_n0 +POSTHOOK: Lineage: customer_removal_n0.c_address SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_city SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_custkey SCRIPT [] +POSTHOOK: Lineage: customer_removal_n0.c_mktsegment SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_name SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_nation SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_phone SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_region SIMPLE [] +PREHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: customer_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +3 +3 +PREHOOK: query: -- group by keys are not primary keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason + order by d_datekey limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- group by keys are not primary keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason + order by d_datekey limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_datekey (type: bigint), d_sellingseason (type: string) + outputColumnNames: d_datekey, d_sellingseason + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: d_datekey (type: bigint), d_sellingseason (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + top n: 10 + Group By Operator + keys: d_datekey (type: bigint), d_sellingseason (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: -- negative + -- with aggregate function + EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA') + group by c_custkey, c_nation +PREHOOK: type: QUERY +POSTHOOK: query: -- negative + -- with aggregate function + EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA') + group by c_custkey, c_nation +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: customer_removal_n0 + filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: c_custkey + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c_custkey (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP TABLE customer_removal_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_removal_n0 +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: DROP TABLE customer_removal_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_removal_n0 +POSTHOOK: Output: default@customer_removal_n0 +PREHOOK: query: DROP TABLE dates_removal_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dates_removal_n0 +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: DROP TABLE dates_removal_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dates_removal_n0 +POSTHOOK: Output: default@dates_removal_n0 +PREHOOK: query: -- group by reduction optimization + create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: -- group by reduction optimization + create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g21 +PREHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dest_g21 +POSTHOOK: Lineage: dest_g21.key1 SCRIPT [] +POSTHOOK: Lineage: dest_g21.value1 SCRIPT [] +PREHOOK: query: -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique + explain select key1 from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +POSTHOOK: query: -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique + explain select key1 from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key1 from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1 from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +2 +3 +4 +5 +6 +PREHOOK: query: -- same query but with filter + explain select key1 from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +POSTHOOK: query: -- same query but with filter + explain select key1 from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 > 1.0D) (type: boolean) + Filter Operator + predicate: (value1 > 1.0D) (type: boolean) + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +2 +4 +PREHOOK: query: -- only value1 will be removed because there is aggregate call + explain select count(key1) from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +POSTHOOK: query: -- only value1 will be removed because there is aggregate call + explain select count(key1) from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g21 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: key1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(key1) from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select count(key1) from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +1 +1 +1 +1 +1 +PREHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 > 1.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 > 1.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: key1 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +1 +1 +PREHOOK: query: -- t1.key is unique even after join therefore group by = group by (t1.key) + explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +POSTHOOK: query: -- t1.key is unique even after join therefore group by = group by (t1.key) + explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +4 +PREHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +PREHOOK: query: -- both aggregate and one of the key1 should be removed + explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +POSTHOOK: query: -- both aggregate and one of the key1 should be removed + explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 < 4.5D) (type: boolean) + Filter Operator + predicate: (value1 < 4.5D) (type: boolean) + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +2 +3 +4 +PREHOOK: query: -- one of the aggregate will be removed and one of the key1 will be removed + explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +POSTHOOK: query: -- one of the aggregate will be removed and one of the key1 will be removed + explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 < 4.5D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 < 4.5D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: key1 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(key1) + keys: key1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 1 +3 3 +4 4 +2 2 +PREHOOK: query: DROP table dest_g21 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g21 +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: DROP table dest_g21 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g21 +POSTHOOK: Output: default@dest_g21 +PREHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tconst +POSTHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tconst +PREHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tconst +POSTHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tconst +POSTHOOK: Lineage: tconst.d_year SCRIPT [] +POSTHOOK: Lineage: tconst.i SCRIPT [] +POSTHOOK: Lineage: tconst.j SCRIPT [] +PREHOOK: query: explain select i, j from tconst where i is not null group by i,j, d_year +PREHOOK: type: QUERY +POSTHOOK: query: explain select i, j from tconst where i is not null group by i,j, d_year +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tconst + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), j (type: int), d_year (type: string) + outputColumnNames: i, j, d_year + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: i (type: int), j (type: int), d_year (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i, j from tconst where i is not null group by i,j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select i, j from tconst where i is not null group by i,j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +3 3 +2 NULL +1 1 +PREHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +PREHOOK: type: QUERY +POSTHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tconst + filterExpr: j is not null (type: boolean) + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: j is not null (type: boolean) + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: i (type: int), j (type: int), d_year (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +3 3 +1 1 +PREHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +PREHOOK: type: QUERY +POSTHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tconst + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), j (type: int), d_year (type: string) + outputColumnNames: i, j, d_year + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: i (type: int), j (type: int), d_year (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +3 3 +2 NULL +1 1 +PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), d_year (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: j is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: j is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: j (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +1 +3 +PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), d_year (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +1 +2 +3 +PREHOOK: query: DROP TABLE tconst +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tconst +PREHOOK: Output: default@tconst +POSTHOOK: query: DROP TABLE tconst +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tconst +POSTHOOK: Output: default@tconst diff --git a/ql/src/test/results/clientpositive/perf/tez/query78.q.out b/ql/src/test/results/clientpositive/perf/tez/query78.q.out index b1102603c7..9f827f7539 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query78.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query78.q.out @@ -139,10 +139,10 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_238] - Limit [LIM_237] (rows=100 width=88) + File Output Operator [FS_241] + Limit [LIM_240] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_236] (rows=23425424 width=88) + Select Operator [SEL_239] (rows=23425424 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_73] @@ -150,28 +150,28 @@ Stage-0 Output:["_col0","_col1","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] Filter Operator [FIL_71] (rows=23425424 width=88) predicate:(COALESCE(_col11,0) > 0) - Merge Join Operator [MERGEJOIN_191] (rows=70276272 width=88) - Conds:RS_68._col1=RS_235._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_194] (rows=70276272 width=88) + Conds:RS_68._col1=RS_238._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13"] <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] + SHUFFLE [RS_238] PartitionCols:_col0 - Select Operator [SEL_234] (rows=43558464 width=135) + Select Operator [SEL_237] (rows=43558464 width=135) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_233] (rows=43558464 width=135) + Group By Operator [GBY_236] (rows=43558464 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col0, _col1 Group By Operator [GBY_64] (rows=87116928 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col3, _col4 - Merge Join Operator [MERGEJOIN_189] (rows=87116928 width=135) - Conds:RS_198._col0=RS_61._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col4, _col3 + Merge Join Operator [MERGEJOIN_192] (rows=87116928 width=135) + Conds:RS_201._col0=RS_61._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + SHUFFLE [RS_201] PartitionCols:_col0 - Select Operator [SEL_193] (rows=36524 width=1119) + Select Operator [SEL_196] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_192] (rows=36524 width=1119) + Filter Operator [FIL_195] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null) TableScan [TS_0] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] @@ -182,32 +182,32 @@ Stage-0 Output:["_col0","_col1","_col2","_col4","_col5","_col6"] Filter Operator [FIL_58] (rows=79197206 width=135) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_188] (rows=158394413 width=135) - Conds:RS_230._col2, _col3=RS_232._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] + Merge Join Operator [MERGEJOIN_191] (rows=158394413 width=135) + Conds:RS_233._col2, _col3=RS_235._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] + SHUFFLE [RS_233] PartitionCols:_col2, _col3 - Select Operator [SEL_229] (rows=143994918 width=135) + Select Operator [SEL_232] (rows=143994918 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_228] (rows=143994918 width=135) + Filter Operator [FIL_231] (rows=143994918 width=135) predicate:((cs_item_sk = cs_item_sk) and (cs_sold_date_sk BETWEEN DynamicValue(RS_60_date_dim_d_date_sk_min) AND DynamicValue(RS_60_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_60_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) TableScan [TS_50] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_order_number","cs_quantity","cs_wholesale_cost","cs_sales_price"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_227] - Group By Operator [GBY_226] (rows=1 width=12) + BROADCAST [RS_230] + Group By Operator [GBY_229] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_205] - Group By Operator [GBY_202] (rows=1 width=12) + SHUFFLE [RS_208] + Group By Operator [GBY_205] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_199] (rows=36524 width=1119) + Select Operator [SEL_202] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_196] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_232] + SHUFFLE [RS_235] PartitionCols:_col0, _col1 - Select Operator [SEL_231] (rows=28798881 width=106) + Select Operator [SEL_234] (rows=28798881 width=106) Output:["_col0","_col1"] TableScan [TS_53] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number"] @@ -216,26 +216,26 @@ Stage-0 PartitionCols:_col1 Filter Operator [FIL_45] (rows=63887519 width=88) predicate:(COALESCE(_col7,0) > 0) - Merge Join Operator [MERGEJOIN_190] (rows=191662559 width=88) - Conds:RS_215._col1, _col0=RS_225._col1, _col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_193] (rows=191662559 width=88) + Conds:RS_218._col1, _col0=RS_228._col1, _col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9"] <-Reducer 3 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_215] + FORWARD [RS_218] PartitionCols:_col1, _col0 - Select Operator [SEL_214] (rows=174238687 width=88) + Select Operator [SEL_217] (rows=174238687 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_213] (rows=174238687 width=88) + Group By Operator [GBY_216] (rows=174238687 width=88) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0, _col1 Group By Operator [GBY_17] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_185] (rows=348477374 width=88) - Conds:RS_194._col0=RS_14._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_188] (rows=348477374 width=88) + Conds:RS_197._col0=RS_14._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_194] + SHUFFLE [RS_197] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_196] <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_14] PartitionCols:_col0 @@ -243,53 +243,53 @@ Stage-0 Output:["_col0","_col1","_col2","_col4","_col5","_col6"] Filter Operator [FIL_11] (rows=316797606 width=88) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_184] (rows=633595212 width=88) - Conds:RS_210._col1, _col3=RS_212._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] + Merge Join Operator [MERGEJOIN_187] (rows=633595212 width=88) + Conds:RS_213._col1, _col3=RS_215._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_210] + SHUFFLE [RS_213] PartitionCols:_col1, _col3 - Select Operator [SEL_209] (rows=575995635 width=88) + Select Operator [SEL_212] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_208] (rows=575995635 width=88) + Filter Operator [FIL_211] (rows=575995635 width=88) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) TableScan [TS_3] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_wholesale_cost","ss_sales_price"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_207] - Group By Operator [GBY_206] (rows=1 width=12) + BROADCAST [RS_210] + Group By Operator [GBY_209] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] - Group By Operator [GBY_200] (rows=1 width=12) + SHUFFLE [RS_206] + Group By Operator [GBY_203] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_195] (rows=36524 width=1119) + Select Operator [SEL_198] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_196] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + SHUFFLE [RS_215] PartitionCols:_col0, _col1 - Select Operator [SEL_211] (rows=57591150 width=77) + Select Operator [SEL_214] (rows=57591150 width=77) Output:["_col0","_col1"] TableScan [TS_6] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_225] + FORWARD [RS_228] PartitionCols:_col1, _col0 - Select Operator [SEL_224] (rows=43560808 width=135) + Select Operator [SEL_227] (rows=43560808 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_223] (rows=43560808 width=135) + Group By Operator [GBY_226] (rows=43560808 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col0, _col1 Group By Operator [GBY_38] (rows=87121617 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_187] (rows=87121617 width=135) - Conds:RS_196._col0=RS_35._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_190] (rows=87121617 width=135) + Conds:RS_199._col0=RS_35._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_196] + SHUFFLE [RS_199] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_196] <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col0 @@ -297,32 +297,32 @@ Stage-0 Output:["_col0","_col1","_col2","_col4","_col5","_col6"] Filter Operator [FIL_32] (rows=79201469 width=135) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_186] (rows=158402938 width=135) - Conds:RS_220._col1, _col3=RS_222._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] + Merge Join Operator [MERGEJOIN_189] (rows=158402938 width=135) + Conds:RS_223._col1, _col3=RS_225._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] + SHUFFLE [RS_223] PartitionCols:_col1, _col3 - Select Operator [SEL_219] (rows=144002668 width=135) + Select Operator [SEL_222] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_218] (rows=144002668 width=135) + Filter Operator [FIL_221] (rows=144002668 width=135) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_34_date_dim_d_date_sk_min) AND DynamicValue(RS_34_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_34_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) TableScan [TS_24] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_order_number","ws_quantity","ws_wholesale_cost","ws_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) + BROADCAST [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_204] - Group By Operator [GBY_201] (rows=1 width=12) + SHUFFLE [RS_207] + Group By Operator [GBY_204] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_197] (rows=36524 width=1119) + Select Operator [SEL_200] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_196] <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_222] + SHUFFLE [RS_225] PartitionCols:_col0, _col1 - Select Operator [SEL_221] (rows=14398467 width=92) + Select Operator [SEL_224] (rows=14398467 width=92) Output:["_col0","_col1"] TableScan [TS_27] (rows=14398467 width=92) default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_order_number"]