diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index fdd8ecc77c..f22db230bb 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -489,6 +489,7 @@ minillaplocal.query.files=\ colstats_date_min_max.q,\ compare_double_bigint_2.q,\ constprog_dpp.q,\ + constraints_optimization.q,\ current_date_timestamp.q,\ correlationoptimizer1.q,\ correlationoptimizer2.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index 635d27e723..41c2f9e6a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniqueness; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution; @@ -71,6 +72,7 @@ public RelMetadataProvider getMetadataProvider() { HiveRelMdSelectivity.SOURCE, HiveRelMdRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, + HiveRelMdColumnUniqueness.SOURCE, HiveRelMdSize.SOURCE, HiveRelMdMemory.SOURCE, new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 42e60de6a8..bc0ca035f4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -99,7 +99,8 @@ private final ImmutableMap hivePartitionColsMap; private final ImmutableList hiveVirtualCols; private final int noOfNonVirtualCols; - private final List keys; + private List keys; + private List nonNullablekeys; private final List referentialConstraints; final HiveConf hiveConf; @@ -133,8 +134,10 @@ public RelOptHiveTable(RelOptSchema calciteSchema, RelDataTypeFactory typeFactor this.partitionCache = partitionCache; this.colStatsCache = colStatsCache; this.noColsMissingStats = noColsMissingStats; - this.keys = generateKeys(); + this.nonNullablekeys = null; + this.keys = null; this.referentialConstraints = generateReferentialConstraints(); + generateKeys(); } //~ Methods ---------------------------------------------------------------- @@ -167,6 +170,8 @@ public Expression getExpression(Class clazz) { throw new UnsupportedOperationException(); } + public List getNonNullableKeys() { return nonNullablekeys; } + @Override public RelOptTable extend(List extendedFields) { throw new UnsupportedOperationException(); @@ -213,6 +218,16 @@ public RelOptHiveTable copy(RelDataType newRowType) { this.hiveConf, this.partitionCache, this.colStatsCache, this.noColsMissingStats); } + // Given a key this method returns true if all of the columns in the key are not nullable + public boolean isNonNullableKey(ImmutableBitSet columns) { + for (ImmutableBitSet key : nonNullablekeys) { + if (key.contains(columns)) { + return true; + } + } + return false; + } + @Override public boolean isKey(ImmutableBitSet columns) { for (ImmutableBitSet key : keys) { @@ -228,8 +243,9 @@ public boolean isKey(ImmutableBitSet columns) { return referentialConstraints; } - private List generateKeys() { + private void generateKeys() { ImmutableList.Builder builder = ImmutableList.builder(); + ImmutableList.Builder nonNullbuilder = ImmutableList.builder(); // First PK final PrimaryKeyInfo pki; try { @@ -250,11 +266,12 @@ public boolean isKey(ImmutableBitSet columns) { } if (pkPos == rowType.getFieldNames().size()) { LOG.error("Column for primary key definition " + pkColName + " not found"); - return ImmutableList.of(); } keys.set(pkPos); } - builder.add(keys.build()); + ImmutableBitSet key = keys.build(); + builder.add(key); + nonNullbuilder.add(key); } // Then UKs final UniqueConstraint uki; @@ -266,23 +283,32 @@ public boolean isKey(ImmutableBitSet columns) { } for (List ukCols : uki.getUniqueConstraints().values()) { ImmutableBitSet.Builder keys = ImmutableBitSet.builder(); + boolean isNonNullable = true; for (UniqueConstraintCol ukCol : ukCols) { int ukPos; for (ukPos = 0; ukPos < rowType.getFieldNames().size(); ukPos++) { String colName = rowType.getFieldNames().get(ukPos); if (ukCol.colName.equals(colName)) { + if(rowType.getFieldList().get(ukPos).getType().isNullable()) { + // they should all be nullable + isNonNullable = false; + } break; } } if (ukPos == rowType.getFieldNames().size()) { LOG.error("Column for unique constraint definition " + ukCol.colName + " not found"); - return ImmutableList.of(); } keys.set(ukPos); } - builder.add(keys.build()); + ImmutableBitSet key = keys.build(); + builder.add(key); + if(isNonNullable) { + nonNullbuilder.add(key); + } } - return builder.build(); + nonNullablekeys = nonNullbuilder.build(); + keys = builder.build(); } private List generateReferentialConstraints() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java index f43ef01293..ed6659c6cc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -171,7 +171,7 @@ public void onMatch(RelOptRuleCall call) { unique = true; } else { final Boolean unique0 = - mq.areColumnsUnique(joinInput, belowAggregateKey); + mq.areColumnsUnique(joinInput, belowAggregateKey, true); unique = unique0 != null && unique0; } if (unique) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index 5857f730a8..b99ce7539c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -24,15 +24,19 @@ import java.util.Map; import java.util.Set; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.linq4j.Ord; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; @@ -309,9 +313,51 @@ else if(rexNode instanceof RexCall return false; } } + + // if gby keys consist of pk/uk non-pk/non-uk columns are removed if they are not being used + private ImmutableBitSet generateNewGroupset(Aggregate aggregate, ImmutableBitSet fieldsUsed) { + + ImmutableBitSet originalGroupSet = aggregate.getGroupSet(); + + if (aggregate.getGroupSets().size() > 1 || aggregate.getIndicatorCount() > 0 + || fieldsUsed.contains(originalGroupSet)) { + // if there is grouping sets, indicator or all the group keys are being used we do no need to proceed further + return originalGroupSet; + } + + final RelNode input = aggregate.getInput(); + RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery(); + + final Set uniqueKeys = mq.getUniqueKeys(input, false); + if (uniqueKeys == null || uniqueKeys.isEmpty()) { + return originalGroupSet; + } + + // we have set of unique key, get to the key which is same as group by key + ImmutableBitSet groupByUniqueKey = null; + + for (ImmutableBitSet key : uniqueKeys) { + if (aggregate.getGroupSet().contains(key)) { + groupByUniqueKey = key; + break; + } + } + + if (groupByUniqueKey == null) { + // group by keys do not represent unique keys + return originalGroupSet; + } + + // we know group by key contains primary key and there is at least one column in group by which is not being used + // if that column is not part of key it should be removed + ImmutableBitSet nonKeyColumns = aggregate.getGroupSet().except(groupByUniqueKey); + ImmutableBitSet columnsToRemove = nonKeyColumns.except(fieldsUsed); + ImmutableBitSet newGroupSet = aggregate.getGroupSet().except(columnsToRemove); + + return newGroupSet; + } + /** - * Variant of {@link #trimFields(Aggregate, ImmutableBitSet, Set)} for - * {@link org.apache.calcite.rel.logical.LogicalAggregate}. * This method replaces group by 'constant key' with group by true (boolean) * if and only if * group by doesn't have grouping sets @@ -323,49 +369,193 @@ else if(rexNode instanceof RexCall * * This is mainly done so that hive is able to push down queries with * group by 'constant key with type not supported by druid' into druid + * */ - public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, - Set extraFields) { - - Aggregate newAggregate = aggregate; - if (!(aggregate.getIndicatorCount() > 0) - && !(aggregate.getGroupSet().isEmpty()) - && !fieldsUsed.contains(aggregate.getGroupSet())) { - final RelNode input = aggregate.getInput(); - final RelDataType rowType = input.getRowType(); - RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); - final List newProjects = new ArrayList<>(); - - final List inputExprs = input.getChildExps(); - if(inputExprs == null || inputExprs.isEmpty()) { - return super.trimFields(newAggregate, fieldsUsed, extraFields); + + private Aggregate rewriteGBConstantKeys(Aggregate aggregate, ImmutableBitSet fieldsUsed, + Set extraFields) { + if ((aggregate.getIndicatorCount() > 0) + || (aggregate.getGroupSet().isEmpty()) + || fieldsUsed.contains(aggregate.getGroupSet())) { + return aggregate; + } + + final RelNode input = aggregate.getInput(); + + + final RelDataType rowType = input.getRowType(); + RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); + final List newProjects = new ArrayList<>(); + + final List inputExprs = input.getChildExps(); + if (inputExprs == null || inputExprs.isEmpty()) { + return aggregate; + } + + boolean allConstants = true; + for (int key : aggregate.getGroupSet()) { + // getChildExprs on Join could return less number of expressions than there are coming out of join + if (inputExprs.size() <= key || !isRexLiteral(inputExprs.get(key))) { + allConstants = false; + break; } + } - boolean allConstants = true; - for(int key : aggregate.getGroupSet()) { - // getChildExprs on Join could return less number of expressions than there are coming out of join - if(inputExprs.size() <= key || !isRexLiteral(inputExprs.get(key))){ - allConstants = false; - break; + if (allConstants) { + for (int i = 0; i < rowType.getFieldCount(); i++) { + if (aggregate.getGroupSet().get(i)) { + newProjects.add(rexBuilder.makeLiteral(true)); + } else { + newProjects.add(rexBuilder.makeInputRef(input, i)); } } + relBuilder.push(input); + relBuilder.project(newProjects); + Aggregate newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(), + aggregate.getGroupSet(), null, aggregate.getAggCallList()); + return newAggregate; + } + return aggregate; + } + + @Override + public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Set extraFields) { + // Fields: + // + // | sys fields | group fields | indicator fields | agg functions | + // + // Two kinds of trimming: + // + // 1. If agg rel has system fields but none of these are used, create an + // agg rel with no system fields. + // + // 2. If aggregate functions are not used, remove them. + // + // But group and indicator fields stay, even if they are not used. + + aggregate = rewriteGBConstantKeys(aggregate, fieldsUsed, extraFields); + + final RelDataType rowType = aggregate.getRowType(); + + // Compute which input fields are used. + // 1. group fields are always used + final ImmutableBitSet.Builder inputFieldsUsed = + aggregate.getGroupSet().rebuild(); + // 2. agg functions + for (AggregateCall aggCall : aggregate.getAggCallList()) { + for (int i : aggCall.getArgList()) { + inputFieldsUsed.set(i); + } + if (aggCall.filterArg >= 0) { + inputFieldsUsed.set(aggCall.filterArg); + } + } - if (allConstants) { - for (int i = 0; i < rowType.getFieldCount(); i++) { - if (aggregate.getGroupSet().get(i)) { - newProjects.add(rexBuilder.makeLiteral(true)); - } else { - newProjects.add(rexBuilder.makeInputRef(input, i)); - } - } - relBuilder.push(input); - relBuilder.project(newProjects); - newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(), - aggregate.getGroupSet(), null, aggregate.getAggCallList()); + // Create input with trimmed columns. + final RelNode input = aggregate.getInput(); + final Set inputExtraFields = Collections.emptySet(); + final TrimResult trimResult = + trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields); + final RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + + ImmutableBitSet originalGroupSet = aggregate.getGroupSet(); + ImmutableBitSet updatedGroupSet = generateNewGroupset(aggregate, fieldsUsed); + ImmutableBitSet gbKeysDeleted = originalGroupSet.except(updatedGroupSet); + ImmutableBitSet updatedGroupFields = ImmutableBitSet.range(originalGroupSet.cardinality()); + final int updatedGroupCount = updatedGroupSet.cardinality(); + + // we need to clear the bits corresponding to deleted gb keys + int setIdx = 0; + while(setIdx != -1) { + setIdx = gbKeysDeleted.nextSetBit(setIdx); + if(setIdx != -1) { + updatedGroupFields = updatedGroupFields.clear(setIdx); + setIdx++; + } + } + fieldsUsed = + fieldsUsed.union(updatedGroupFields); + + // If the input is unchanged, and we need to project all columns, + // there's nothing to do. + if (input == newInput + && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) { + return result(aggregate, + Mappings.createIdentity(rowType.getFieldCount())); + } + + // update the group by keys based on inputMapping + ImmutableBitSet newGroupSet = + Mappings.apply(inputMapping, updatedGroupSet); + + // Which agg calls are used by our consumer? + int originalGroupCount = aggregate.getGroupSet().cardinality(); + int j = originalGroupCount; + int usedAggCallCount = 0; + for (int i = 0; i < aggregate.getAggCallList().size(); i++) { + if (fieldsUsed.get(j++)) { + ++usedAggCallCount; + } + } + + // Offset due to the number of system fields having changed. + Mapping mapping = + Mappings.create( + MappingType.INVERSE_SURJECTION, + rowType.getFieldCount(), + updatedGroupCount + usedAggCallCount); + + + // if group keys were reduced, it means we didn't have grouping therefore + // we don't need to transform group sets + ImmutableList newGroupSets = null; + if(!updatedGroupSet.equals(aggregate.getGroupSet())) { + newGroupSets = ImmutableList.of(newGroupSet); + } else { + newGroupSets = ImmutableList.copyOf( + Iterables.transform(aggregate.getGroupSets(), + input1 -> Mappings.apply(inputMapping, input1))); + } + + // Populate mapping of where to find the fields. System, group key and + // indicator fields first. + int gbKeyIdx = 0; + for (j = 0; j < originalGroupCount; j++) { + if(fieldsUsed.get(j)) { + mapping.set(j, gbKeyIdx); + gbKeyIdx++; + } + } + + // Now create new agg calls, and populate mapping for them. + relBuilder.push(newInput); + final List newAggCallList = new ArrayList<>(); + j = originalGroupCount; // because lookup in fieldsUsed is done using original group count + for (AggregateCall aggCall : aggregate.getAggCallList()) { + if (fieldsUsed.get(j)) { + final ImmutableList args = + relBuilder.fields( + Mappings.apply2(inputMapping, aggCall.getArgList())); + final RexNode filterArg = aggCall.filterArg < 0 ? null + : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg)); + RelBuilder.AggCall newAggCall = + relBuilder.aggregateCall(aggCall.getAggregation(), + aggCall.isDistinct(), aggCall.isApproximate(), + filterArg, aggCall.name, args); + mapping.set(j, updatedGroupCount + newAggCallList.size()); + newAggCallList.add(newAggCall); } + ++j; } - return super.trimFields(newAggregate, fieldsUsed, extraFields); + + final RelBuilder.GroupKey groupKey = + relBuilder.groupKey(newGroupSet, newGroupSets); + relBuilder.aggregate(groupKey, newAggCallList); + + return result(relBuilder.build(), mapping); } + /** * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for * {@link org.apache.calcite.rel.logical.LogicalProject}. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java new file mode 100644 index 0000000000..0d08b37abb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.JoinInfo; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.BitSets; +import org.apache.calcite.util.ImmutableBitSet; + +import com.google.common.collect.ImmutableSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +import java.util.BitSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * EstimateUniqueKeys provides an ability to estimate unique keys based on statistics + */ +//TODO: Ideally RelMdUniqueKeys should be modified (on Calcite side) to accept a parameter based on which + // this logic whoud be implemented +public class EstimateUniqueKeys { + //~ Constructors ----------------------------------------------------------- + private EstimateUniqueKeys() {} + + //~ Methods ---------------------------------------------------------------- + + private static Set getUniqueKeys(HiveFilter rel) { + return getUniqueKeys(rel.getInput()); + } + + private static Set getUniqueKeys(HiveSortLimit rel) { + return getUniqueKeys(rel.getInput()); + } + + private static Set getUniqueKeys(Correlate rel) { + return getUniqueKeys(rel.getLeft() ); + } + + + //Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col) + // - min(col) = rowCount(col) + private static Set generateKeysUsingStatsEstimation(Project rel, + HiveTableScan tScan) { + Map posMap = new HashMap(); + int projectPos = 0; + int colStatsPos = 0; + + BitSet projectedCols = new BitSet(); + for (RexNode r : rel.getProjects()) { + if (r instanceof RexInputRef) { + projectedCols.set(((RexInputRef) r).getIndex()); + posMap.put(colStatsPos, projectPos); + colStatsPos++; + } + projectPos++; + } + + RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); + double numRows = mq.getRowCount(tScan); + List colStats = tScan.getColStat(BitSets + .toList(projectedCols)); + Set keys = new HashSet(); + + colStatsPos = 0; + for (ColStatistics cStat : colStats) { + boolean isKey = false; + if (cStat.getCountDistint() >= numRows) { + isKey = true; + } + if (!isKey && cStat.getRange() != null && + cStat.getRange().maxValue != null && + cStat.getRange().minValue != null) { + double r = cStat.getRange().maxValue.doubleValue() - + cStat.getRange().minValue.doubleValue() + 1; + isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON); + } + if (isKey) { + ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos)); + keys.add(key); + } + colStatsPos++; + } + + return keys; + + } + + private static Set getUniqueKeys(HiveProject rel) { + + HiveTableScan tScan = getTableScan(rel.getInput(), false); + if (tScan != null) { + return generateKeysUsingStatsEstimation(rel, tScan); + } + + // LogicalProject maps a set of rows to a different set; + // Without knowledge of the mapping function(whether it + // preserves uniqueness), it is only safe to derive uniqueness + // info from the child of a project when the mapping is f(a) => a. + // + // Further more, the unique bitset coming from the child needs + // to be mapped to match the output of the project. + final Map mapInToOutPos = new HashMap<>(); + final List projExprs = rel.getProjects(); + final Set projUniqueKeySet = new HashSet<>(); + + // Build an input to output position map. + for (int i = 0; i < projExprs.size(); i++) { + RexNode projExpr = projExprs.get(i); + if (projExpr instanceof RexInputRef) { + mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i); + } + } + + if (mapInToOutPos.isEmpty()) { + // if there's no RexInputRef in the projected expressions + // return empty set. + return projUniqueKeySet; + } + + Set childUniqueKeySet = + getUniqueKeys(rel.getInput()); + + if (childUniqueKeySet != null) { + // Now add to the projUniqueKeySet the child keys that are fully + // projected. + for (ImmutableBitSet colMask : childUniqueKeySet) { + ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder(); + boolean completeKeyProjected = true; + for (int bit : colMask) { + if (mapInToOutPos.containsKey(bit)) { + tmpMask.set(mapInToOutPos.get(bit)); + } else { + // Skip the child unique key if part of it is not + // projected. + completeKeyProjected = false; + break; + } + } + if (completeKeyProjected) { + projUniqueKeySet.add(tmpMask.build()); + } + } + } + + return projUniqueKeySet; + + } + + private static Set getUniqueKeys(HiveJoin rel) { + RelNode left = rel.getLeft(); + if(left != null && left instanceof HepRelVertex) { + left = ((HepRelVertex) left).getCurrentRel(); + } + RelNode right = rel.getRight(); + if(right != null && right instanceof HepRelVertex) { + right = ((HepRelVertex) right).getCurrentRel(); + } + + // first add the different combinations of concatenated unique keys + // from the left and the right, adjusting the right hand side keys to + // reflect the addition of the left hand side + // + // NOTE zfong 12/18/06 - If the number of tables in a join is large, + // the number of combinations of unique key sets will explode. If + // that is undesirable, use RelMetadataQuery.areColumnsUnique() as + // an alternative way of getting unique key information. + + final Set retSet = new HashSet<>(); + final Set leftSet = getUniqueKeys(left); + Set rightSet = null; + + final Set tmpRightSet = getUniqueKeys(right); + int nFieldsOnLeft = left.getRowType().getFieldCount(); + + if (tmpRightSet != null) { + rightSet = new HashSet<>(); + for (ImmutableBitSet colMask : tmpRightSet) { + ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder(); + for (int bit : colMask) { + tmpMask.set(bit + nFieldsOnLeft); + } + rightSet.add(tmpMask.build()); + } + + if (leftSet != null) { + for (ImmutableBitSet colMaskRight : rightSet) { + for (ImmutableBitSet colMaskLeft : leftSet) { + retSet.add(colMaskLeft.union(colMaskRight)); + } + } + } + } + + // locate the columns that participate in equijoins + final JoinInfo joinInfo = rel.analyzeCondition(); + RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); + + // determine if either or both the LHS and RHS are unique on the + // equijoin columns + final Boolean leftUnique = + mq.areColumnsUnique(left, joinInfo.leftSet()); + final Boolean rightUnique = + mq.areColumnsUnique(right, joinInfo.rightSet()); + + // if the right hand side is unique on its equijoin columns, then we can + // add the unique keys from left if the left hand side is not null + // generating + if ((rightUnique != null) + && rightUnique + && (leftSet != null) + && !(rel.getJoinType().generatesNullsOnLeft())) { + retSet.addAll(leftSet); + } + + // same as above except left and right are reversed + if ((leftUnique != null) + && leftUnique + && (rightSet != null) + && !(rel.getJoinType().generatesNullsOnRight())) { + retSet.addAll(rightSet); + } + + return retSet; + } + + private static Set getUniqueKeys(HiveSemiJoin rel) { + // only return the unique keys from the LHS since a semijoin only + // returns the LHS + return getUniqueKeys(rel.getLeft()); + } + + private static Set getUniqueKeys(HiveAggregate rel) { + // group by keys form a unique key + return ImmutableSet.of(rel.getGroupSet()); + } + + private static Set getUniqueKeys(SetOp rel) { + if (!rel.all) { + return ImmutableSet.of( + ImmutableBitSet.range(rel.getRowType().getFieldCount())); + } + return ImmutableSet.of(); + } + + // Catch-all rule when none of the others apply. + public static Set getUniqueKeys(RelNode rel) { + if(rel != null && rel instanceof HepRelVertex) { + rel = ((HepRelVertex) rel).getCurrentRel(); + } + if (rel instanceof HiveFilter) { + return getUniqueKeys((HiveFilter) rel); + } else if (rel instanceof HiveSortLimit) { + return getUniqueKeys((HiveSortLimit) rel); + } else if (rel instanceof Correlate) { + return getUniqueKeys((Correlate) rel); + } else if (rel instanceof HiveProject) { + return getUniqueKeys((HiveProject) rel); + } else if (rel instanceof HiveJoin) { + return getUniqueKeys((HiveJoin) rel); + } else if (rel instanceof HiveSemiJoin) { + return getUniqueKeys((HiveSemiJoin) rel); + } else if (rel instanceof HiveAggregate) { + return getUniqueKeys((HiveAggregate) rel); + } else if (rel instanceof SetOp) { + return getUniqueKeys((SetOp) rel); + } else { + return null; + } + } + + /* + * traverse a path of Filter, Projects to get to the TableScan. + * In case of Unique keys, stop if you reach a Project, it will be handled + * by the invocation on the Project. + * In case of getting the base rowCount of a Path, keep going past a Project. + */ + static HiveTableScan getTableScan(RelNode r, boolean traverseProject) { + + while (r != null && !(r instanceof HiveTableScan)) { + if (r instanceof HepRelVertex) { + r = ((HepRelVertex) r).getCurrentRel(); + } else if (r instanceof Filter) { + r = ((Filter) r).getInput(); + } else if (traverseProject && r instanceof Project) { + r = ((Project) r).getInput(); + } else { + r = null; + } + } + return r == null ? null : (HiveTableScan) r; + } +} + +// End EstimateUniqueKeys.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java new file mode 100644 index 0000000000..8a9cfe49dd --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + + +import org.apache.calcite.rel.metadata.BuiltInMetadata; +import org.apache.calcite.rel.metadata.MetadataDef; +import org.apache.calcite.rel.metadata.MetadataHandler; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; + +public class HiveRelMdColumnUniqueness + implements MetadataHandler { + public static final RelMetadataProvider SOURCE = + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.COLUMN_UNIQUENESS.method, new HiveRelMdColumnUniqueness()); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdColumnUniqueness() {} + + //~ Methods ---------------------------------------------------------------- + + public MetadataDef getDef() { + return BuiltInMetadata.ColumnUniqueness.DEF; + } + + public Boolean areColumnsUnique(HiveTableScan rel, RelMetadataQuery mq, + ImmutableBitSet columns, boolean ignoreNulls) { + if(ignoreNulls) { + return rel.getTable().isKey(columns); + } else { + RelOptHiveTable tbl = (RelOptHiveTable)rel.getTable(); + return tbl.isNonNullableKey(columns); + } + } +} + +// End HiveRelMdColumnUniqueness.java + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index 1ca1937ed9..576ed34bf3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -317,7 +317,7 @@ private static double pkSelectivity(Join joinRel, RelMetadataQuery mq, boolean l (!leftChild && joinRel.getJoinType().generatesNullsOnLeft())) { return 1.0; } else { - HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true); + HiveTableScan tScan = EstimateUniqueKeys.getTableScan(child, true); if (tScan != null) { double tRowCount = mq.getRowCount(tScan); return childRowCount / tRowCount; @@ -329,7 +329,10 @@ private static double pkSelectivity(Join joinRel, RelMetadataQuery mq, boolean l private static boolean isKey(ImmutableBitSet c, RelNode rel, RelMetadataQuery mq) { boolean isKey = false; - Set keys = mq.getUniqueKeys(rel); + //EstimateUniqueKeys doesn't go through metadata providers anymore, which means we will not be + // taking advantage of metadata caching anymore. This could potential increase query compile time + // Leaving a note here in case we see increase in timings + Set keys = EstimateUniqueKeys.getUniqueKeys(rel); if (keys != null) { for (ImmutableBitSet key : keys) { if (key.equals(c)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java index 3bf62c535c..7772335ac5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java @@ -17,31 +17,20 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; -import java.util.BitSet; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.plan.hep.HepRelVertex; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.metadata.BuiltInMetadata; import org.apache.calcite.rel.metadata.MetadataDef; import org.apache.calcite.rel.metadata.MetadataHandler; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.util.BitSets; import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; -import org.apache.hadoop.hive.ql.plan.ColStatistics; public class HiveRelMdUniqueKeys implements MetadataHandler { @@ -54,138 +43,15 @@ return BuiltInMetadata.UniqueKeys.DEF; } - /* - * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col) - * - min(col) = rowCount(col) - * - * Why are we intercepting Project and not TableScan? Because if we - * have a method for TableScan, it will not know which columns to check for. - * Inferring Uniqueness for all columns is very expensive right now. The flip - * side of doing this is, it only works post Field Trimming. - */ - public Set getUniqueKeys(Project rel, RelMetadataQuery mq, boolean ignoreNulls) { - HiveTableScan tScan = getTableScan(rel.getInput(), false); - - if (tScan == null) { - // If HiveTableScan is not found, e.g., not sequence of Project and - // Filter operators, execute the original getUniqueKeys method - - // LogicalProject maps a set of rows to a different set; - // Without knowledge of the mapping function(whether it - // preserves uniqueness), it is only safe to derive uniqueness - // info from the child of a project when the mapping is f(a) => a. - // - // Further more, the unique bitset coming from the child needs - // to be mapped to match the output of the project. - final Map mapInToOutPos = new HashMap<>(); - final List projExprs = rel.getProjects(); - final Set projUniqueKeySet = new HashSet<>(); - - // Build an input to output position map. - for (int i = 0; i < projExprs.size(); i++) { - RexNode projExpr = projExprs.get(i); - if (projExpr instanceof RexInputRef) { - mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i); - } - } - - if (mapInToOutPos.isEmpty()) { - // if there's no RexInputRef in the projected expressions - // return empty set. - return projUniqueKeySet; - } - - Set childUniqueKeySet = - mq.getUniqueKeys(rel.getInput(), ignoreNulls); - - if (childUniqueKeySet != null) { - // Now add to the projUniqueKeySet the child keys that are fully - // projected. - for (ImmutableBitSet colMask : childUniqueKeySet) { - ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder(); - boolean completeKeyProjected = true; - for (int bit : colMask) { - if (mapInToOutPos.containsKey(bit)) { - tmpMask.set(mapInToOutPos.get(bit)); - } else { - // Skip the child unique key if part of it is not - // projected. - completeKeyProjected = false; - break; - } - } - if (completeKeyProjected) { - projUniqueKeySet.add(tmpMask.build()); - } - } - } - - return projUniqueKeySet; - } - - Map posMap = new HashMap(); - int projectPos = 0; - int colStatsPos = 0; - - BitSet projectedCols = new BitSet(); - for (RexNode r : rel.getProjects()) { - if (r instanceof RexInputRef) { - projectedCols.set(((RexInputRef) r).getIndex()); - posMap.put(colStatsPos, projectPos); - colStatsPos++; - } - projectPos++; + public Set getUniqueKeys(HiveTableScan rel, RelMetadataQuery mq, + boolean ignoreNulls) { + RelOptHiveTable tbl = (RelOptHiveTable) rel.getTable(); + List keyList = tbl.getNonNullableKeys(); + if (keyList != null) { + Set keySet = new HashSet<>(keyList); + return keySet; } - - double numRows = mq.getRowCount(tScan); - List colStats = tScan.getColStat(BitSets - .toList(projectedCols)); - Set keys = new HashSet(); - - colStatsPos = 0; - for (ColStatistics cStat : colStats) { - boolean isKey = false; - if (cStat.getCountDistint() >= numRows) { - isKey = true; - } - if ( !isKey && cStat.getRange() != null && - cStat.getRange().maxValue != null && - cStat.getRange().minValue != null) { - double r = cStat.getRange().maxValue.doubleValue() - - cStat.getRange().minValue.doubleValue() + 1; - isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON); - } - if ( isKey ) { - ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos)); - keys.add(key); - } - colStatsPos++; - } - - return keys; + return null; } - - /* - * traverse a path of Filter, Projects to get to the TableScan. - * In case of Unique keys, stop if you reach a Project, it will be handled - * by the invocation on the Project. - * In case of getting the base rowCount of a Path, keep going past a Project. - */ - static HiveTableScan getTableScan(RelNode r, boolean traverseProject) { - - while (r != null && !(r instanceof HiveTableScan)) { - if (r instanceof HepRelVertex) { - r = ((HepRelVertex) r).getCurrentRel(); - } else if (r instanceof Filter) { - r = ((Filter) r).getInput(); - } else if (traverseProject && r instanceof Project) { - r = ((Project) r).getInput(); - } else { - r = null; - } - } - return r == null ? null : (HiveTableScan) r; - } - } diff --git a/ql/src/test/queries/clientpositive/constraints_optimization.q b/ql/src/test/queries/clientpositive/constraints_optimization.q new file mode 100644 index 0000000000..49864655ad --- /dev/null +++ b/ql/src/test/queries/clientpositive/constraints_optimization.q @@ -0,0 +1,146 @@ +set hive.strict.checks.cartesian.product=false; + +CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely); + +CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_id` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`, `d_id`) disable rely); + + -- group by key has single primary key + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey; + + -- mix of primary + non-primary keys + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation; + + -- multiple keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id; + + -- multiple keys + non-keys + different order + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason + order by d_datekey limit 10; + + -- multiple keys in different order and mixed with non-keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10; + + -- same as above but with aggregate + EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10; + + -- join + insert into dates_removal_n0(d_datekey, d_id) values(3, 0); + insert into dates_removal_n0(d_datekey, d_id) values(3, 1); + insert into customer_removal_n0 (c_custkey) values(3); + + EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id; + SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id; + + -- group by keys are not primary keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason + order by d_datekey limit 10; + + -- negative + -- with aggregate function + EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA') + group by c_custkey, c_nation; + + DROP TABLE customer_removal_n0; + DROP TABLE dates_removal_n0; + + -- group by reduction optimization + create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely); + insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null); + + -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique + explain select key1 from dest_g21 group by key1, value1; + select key1 from dest_g21 group by key1, value1; + -- same query but with filter + explain select key1 from dest_g21 where value1 > 1 group by key1, value1; + select key1 from dest_g21 where value1 > 1 group by key1, value1; + + -- only value1 will be removed because there is aggregate call + explain select count(key1) from dest_g21 group by key1, value1; + select count(key1) from dest_g21 group by key1, value1; + + explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1; + select count(key1) from dest_g21 where value1 > 1 group by key1, value1; + + -- t1.key is unique even after join therefore group by = group by (t1.key) + explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + + explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + + -- both aggregate and one of the key1 should be removed + explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub; + select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub; + + -- one of the aggregate will be removed and one of the key1 will be removed + explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub; + select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub; + + DROP table dest_g21; + +CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string); +INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010'); + +-- explicit NOT NULL filter +explain select i, j from tconst where i is not null group by i,j, d_year; +select i, j from tconst where i is not null group by i,j, d_year; + +-- filter on i should be removed +explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year; +select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year; + +-- where will be removed since i is not null is always true +explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year; +select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year; + +-- should not have implicit filter on join keys +explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year; +select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year; + +-- both join keys have NOT NULL +explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year; +select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year; + +DROP TABLE tconst; + + +-- UNIQUE + NOT NULL (same as primary key) +create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely); +explain select key1 from dest_g21 group by key1, value1; + +-- UNIQUE with nullabiity +create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely); +explain select key1 from dest_g24 group by key1, value1; + +DROP TABLE dest_g21; +DROP TABLE dest_g24; diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out new file mode 100644 index 0000000000..18aa708b90 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -0,0 +1,1971 @@ +PREHOOK: query: CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_removal_n0 +PREHOOK: query: CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_id` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`, `d_id`) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_id` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`, `d_id`) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dates_removal_n0 +PREHOOK: query: -- group by key has single primary key + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- group by key has single primary key + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: customer_removal_n0 + filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Filter Operator + predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: -- mix of primary + non-primary keys + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- mix of primary + non-primary keys + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: customer_removal_n0 + filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Filter Operator + predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: -- multiple keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id +PREHOOK: type: QUERY +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- multiple keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: -- multiple keys + non-keys + different order + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason + order by d_datekey limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- multiple keys + non-keys + different order + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason + order by d_datekey limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: -- multiple keys in different order and mixed with non-keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- multiple keys in different order and mixed with non-keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: -- same as above but with aggregate + EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- same as above but with aggregate + EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint), d_id (type: bigint) + outputColumnNames: d_datekey, d_id + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: d_datekey (type: bigint), d_id (type: bigint) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Group By Operator + aggregations: count() + keys: d_datekey (type: bigint), d_id (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- join + insert into dates_removal_n0(d_datekey, d_id) values(3, 0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: -- join + insert into dates_removal_n0(d_datekey, d_id) values(3, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dates_removal_n0 +POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE [] +PREHOOK: query: insert into dates_removal_n0(d_datekey, d_id) values(3, 1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: insert into dates_removal_n0(d_datekey, d_id) values(3, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dates_removal_n0 +POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE [] +PREHOOK: query: insert into customer_removal_n0 (c_custkey) values(3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: insert into customer_removal_n0 (c_custkey) values(3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@customer_removal_n0 +POSTHOOK: Lineage: customer_removal_n0.c_address SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_city SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_custkey SCRIPT [] +POSTHOOK: Lineage: customer_removal_n0.c_mktsegment SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_name SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_nation SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_phone SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_region SIMPLE [] +PREHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: customer_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +3 +3 +PREHOOK: query: -- group by keys are not primary keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason + order by d_datekey limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- group by keys are not primary keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason + order by d_datekey limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_datekey (type: bigint), d_sellingseason (type: string) + outputColumnNames: d_datekey, d_sellingseason + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: d_datekey (type: bigint), d_sellingseason (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + top n: 10 + Group By Operator + keys: d_datekey (type: bigint), d_sellingseason (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: -- negative + -- with aggregate function + EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA') + group by c_custkey, c_nation +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- negative + -- with aggregate function + EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA') + group by c_custkey, c_nation +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: customer_removal_n0 + filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: c_custkey + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c_custkey (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP TABLE customer_removal_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_removal_n0 +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: DROP TABLE customer_removal_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_removal_n0 +POSTHOOK: Output: default@customer_removal_n0 +PREHOOK: query: DROP TABLE dates_removal_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dates_removal_n0 +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: DROP TABLE dates_removal_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dates_removal_n0 +POSTHOOK: Output: default@dates_removal_n0 +PREHOOK: query: -- group by reduction optimization + create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: -- group by reduction optimization + create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g21 +PREHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dest_g21 +POSTHOOK: Lineage: dest_g21.key1 SCRIPT [] +POSTHOOK: Lineage: dest_g21.value1 SCRIPT [] +PREHOOK: query: -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique + explain select key1 from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique + explain select key1 from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key1 from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1 from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +2 +3 +4 +5 +6 +PREHOOK: query: -- same query but with filter + explain select key1 from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- same query but with filter + explain select key1 from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 > 1.0D) (type: boolean) + Filter Operator + predicate: (value1 > 1.0D) (type: boolean) + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +2 +4 +PREHOOK: query: -- only value1 will be removed because there is aggregate call + explain select count(key1) from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- only value1 will be removed because there is aggregate call + explain select count(key1) from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g21 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: key1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(key1) from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select count(key1) from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +1 +1 +1 +1 +1 +PREHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 > 1.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 > 1.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: key1 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +1 +1 +PREHOOK: query: -- t1.key is unique even after join therefore group by = group by (t1.key) + explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- t1.key is unique even after join therefore group by = group by (t1.key) + explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +4 +PREHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +PREHOOK: query: -- both aggregate and one of the key1 should be removed + explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- both aggregate and one of the key1 should be removed + explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 < 4.5D) (type: boolean) + Filter Operator + predicate: (value1 < 4.5D) (type: boolean) + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +2 +3 +4 +PREHOOK: query: -- one of the aggregate will be removed and one of the key1 will be removed + explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- one of the aggregate will be removed and one of the key1 will be removed + explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 < 4.5D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 < 4.5D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: key1 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(key1) + keys: key1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 1 +3 3 +4 4 +2 2 +PREHOOK: query: DROP table dest_g21 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g21 +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: DROP table dest_g21 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g21 +POSTHOOK: Output: default@dest_g21 +PREHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tconst +POSTHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tconst +PREHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tconst +POSTHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tconst +POSTHOOK: Lineage: tconst.d_year SCRIPT [] +POSTHOOK: Lineage: tconst.i SCRIPT [] +POSTHOOK: Lineage: tconst.j SCRIPT [] +PREHOOK: query: explain select i, j from tconst where i is not null group by i,j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: explain select i, j from tconst where i is not null group by i,j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tconst + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), j (type: int), d_year (type: string) + outputColumnNames: i, j, d_year + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: i (type: int), j (type: int), d_year (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i, j from tconst where i is not null group by i,j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select i, j from tconst where i is not null group by i,j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +3 3 +2 NULL +1 1 +PREHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tconst + filterExpr: j is not null (type: boolean) + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: j is not null (type: boolean) + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: i (type: int), j (type: int), d_year (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +3 3 +1 1 +PREHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tconst + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), j (type: int), d_year (type: string) + outputColumnNames: i, j, d_year + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: i (type: int), j (type: int), d_year (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +3 3 +2 NULL +1 1 +PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), d_year (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: j is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: j is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: j (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +1 +3 +PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), d_year (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +1 +2 +3 +PREHOOK: query: DROP TABLE tconst +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tconst +PREHOOK: Output: default@tconst +POSTHOOK: query: DROP TABLE tconst +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tconst +POSTHOOK: Output: default@tconst +PREHOOK: query: create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g21 +PREHOOK: query: explain select key1 from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: explain select key1 from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g24 +POSTHOOK: query: create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g24 +PREHOOK: query: explain select key1 from dest_g24 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g24 +#### A masked pattern was here #### +POSTHOOK: query: explain select key1 from dest_g24 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g24 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g24 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: int), value1 (type: double) + outputColumnNames: key1, value1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key1 (type: int), value1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: double) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP TABLE dest_g21 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g21 +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: DROP TABLE dest_g21 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g21 +POSTHOOK: Output: default@dest_g21 +PREHOOK: query: DROP TABLE dest_g24 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g24 +PREHOOK: Output: default@dest_g24 +POSTHOOK: query: DROP TABLE dest_g24 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g24 +POSTHOOK: Output: default@dest_g24