diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 9984ce5eed..cb64e49614 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -489,6 +489,7 @@ minillaplocal.query.files=\ colstats_date_min_max.q,\ compare_double_bigint_2.q,\ constprog_dpp.q,\ + constraints_optimization.q,\ current_date_timestamp.q,\ correlationoptimizer1.q,\ correlationoptimizer2.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 66280b2da1..0cefeeea37 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -228,8 +228,6 @@ private VectorizedOrcAcidRowBatchReader(JobConf conf, OrcSplit orcSplit, Reporte LOG.info("Read ValidWriteIdList: " + this.validWriteIdList.toString() + ":" + orcSplit); - this.syntheticProps = orcSplit.getSyntheticAcidProps(); - // Clone readerOptions for deleteEvents. Reader.Options deleteEventReaderOptions = readerOptions.clone(); // Set the range on the deleteEventReaderOptions to 0 to INTEGER_MAX because @@ -259,6 +257,7 @@ private VectorizedOrcAcidRowBatchReader(JobConf conf, OrcSplit orcSplit, Reporte } rowIdProjected = areRowIdsProjected(rbCtx); rootPath = orcSplit.getRootDir(); + syntheticProps = orcSplit.getSyntheticAcidProps(); /** * This could be optimized by moving dir type/write id based checks are @@ -394,13 +393,6 @@ public void setBaseAndInnerReader( LOG.debug("findMinMaxKeys() " + ConfVars.FILTER_DELETE_EVENTS + "=false"); return new OrcRawRecordMerger.KeyInterval(null, null); } - - //todo: since we already have OrcSplit.orcTail, should somehow use it to - // get the acid.index, stats, etc rather than fetching the footer again - // though it seems that orcTail is mostly null.... - Reader reader = OrcFile.createReader(orcSplit.getPath(), - OrcFile.readerOptions(conf)); - if(orcSplit.isOriginal()) { /** * Among originals we may have files with _copy_N suffix. To properly @@ -411,11 +403,14 @@ public void setBaseAndInnerReader( * Kind of chicken-and-egg - deal with this later. * See {@link OrcRawRecordMerger#discoverOriginalKeyBounds(Reader, int, * Reader.Options, Configuration, OrcRawRecordMerger.Options)}*/ - LOG.debug("findMinMaxKeys(original split)"); - - return findOriginalMinMaxKeys(orcSplit, reader, deleteEventReaderOptions); + LOG.debug("findMinMaxKeys(original split) - ignoring"); + return new OrcRawRecordMerger.KeyInterval(null, null); } - + //todo: since we already have OrcSplit.orcTail, should somehow use it to + // get the acid.index, stats, etc rather than fetching the footer again + // though it seems that orcTail is mostly null.... + Reader reader = OrcFile.createReader(orcSplit.getPath(), + OrcFile.readerOptions(conf)); List stripes = reader.getStripes(); final long splitStart = orcSplit.getStart(); final long splitEnd = splitStart + orcSplit.getLength(); @@ -583,68 +578,6 @@ dropped by the Reader (I guess because of orc.impl.SchemaEvolution) return keyInterval; } - private OrcRawRecordMerger.KeyInterval findOriginalMinMaxKeys(OrcSplit orcSplit, Reader reader, - Reader.Options deleteEventReaderOptions) { - - // This method returns the minimum and maximum synthetic row ids that are present in this split - // because min and max keys are both inclusive when filtering out the delete delta records. - - if (syntheticProps == null) { - // syntheticProps containing the synthetic rowid offset is computed if there are delete delta files. - // If there aren't any delete delta files, then we don't need this anyway. - return new OrcRawRecordMerger.KeyInterval(null, null); - } - - long splitStart = orcSplit.getStart(); - long splitEnd = orcSplit.getStart() + orcSplit.getLength(); - - long minRowId = syntheticProps.getRowIdOffset(); - long maxRowId = syntheticProps.getRowIdOffset(); - - for(StripeInformation stripe: reader.getStripes()) { - if (splitStart > stripe.getOffset()) { - // This stripe starts before the current split starts. This stripe is not included in this split. - minRowId += stripe.getNumberOfRows(); - } - - if (splitEnd > stripe.getOffset()) { - // This stripe starts before the current split ends. - maxRowId += stripe.getNumberOfRows(); - } else { - // The split ends before (or exactly where) this stripe starts. - // Remaining stripes are not included in this split. - break; - } - } - - RecordIdentifier minKey = new RecordIdentifier(syntheticProps.getSyntheticWriteId(), - syntheticProps.getBucketProperty(), minRowId); - - RecordIdentifier maxKey = new RecordIdentifier(syntheticProps.getSyntheticWriteId(), - syntheticProps.getBucketProperty(), maxRowId > 0? maxRowId - 1: 0); - - OrcRawRecordMerger.KeyInterval keyIntervalTmp = new OrcRawRecordMerger.KeyInterval(minKey, maxKey); - - if (minRowId >= maxRowId) { - /** - * The split lies entirely within a single stripe. In this case, the reader for this split will not read any data. - * See {@link org.apache.orc.impl.RecordReaderImpl#RecordReaderImpl - * We can return the min max key interval as is (it will not read any of the delete delta records into mem) - */ - - LOG.info("findOriginalMinMaxKeys(): This split starts and ends in the same stripe."); - } - - LOG.info("findOriginalMinMaxKeys(): " + keyIntervalTmp); - - // Using min/max ROW__ID from original will work for ppd to the delete deltas because the writeid is the same in - // the min and the max ROW__ID - setSARG(keyIntervalTmp, deleteEventReaderOptions, minKey.getBucketProperty(), maxKey.getBucketProperty(), - minKey.getRowId(), maxKey.getRowId()); - - return keyIntervalTmp; - } - /** * See {@link #next(NullWritable, VectorizedRowBatch)} first and * {@link OrcRawRecordMerger.OriginalReaderPair}. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index 635d27e723..41c2f9e6a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniqueness; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution; @@ -71,6 +72,7 @@ public RelMetadataProvider getMetadataProvider() { HiveRelMdSelectivity.SOURCE, HiveRelMdRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, + HiveRelMdColumnUniqueness.SOURCE, HiveRelMdSize.SOURCE, HiveRelMdMemory.SOURCE, new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 42e60de6a8..eb6e746b64 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.optimizer.calcite; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -35,7 +34,6 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelDistribution; -import org.apache.calcite.rel.RelDistributions; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelFieldCollation.Direction; import org.apache.calcite.rel.RelFieldCollation.NullDirection; @@ -49,6 +47,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.schema.ColumnStrategy; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; import org.apache.calcite.util.mapping.IntPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -100,6 +99,7 @@ private final ImmutableList hiveVirtualCols; private final int noOfNonVirtualCols; private final List keys; + private final List nonNullablekeys; private final List referentialConstraints; final HiveConf hiveConf; @@ -133,8 +133,11 @@ public RelOptHiveTable(RelOptSchema calciteSchema, RelDataTypeFactory typeFactor this.partitionCache = partitionCache; this.colStatsCache = colStatsCache; this.noColsMissingStats = noColsMissingStats; - this.keys = generateKeys(); + Pair, List> constraintKeys = generateKeys(); + this.keys = constraintKeys.left; + this.nonNullablekeys = constraintKeys.right; this.referentialConstraints = generateReferentialConstraints(); + generateKeys(); } //~ Methods ---------------------------------------------------------------- @@ -167,6 +170,10 @@ public Expression getExpression(Class clazz) { throw new UnsupportedOperationException(); } + public List getNonNullableKeys() { + return nonNullablekeys; + } + @Override public RelOptTable extend(List extendedFields) { throw new UnsupportedOperationException(); @@ -213,6 +220,16 @@ public RelOptHiveTable copy(RelDataType newRowType) { this.hiveConf, this.partitionCache, this.colStatsCache, this.noColsMissingStats); } + // Given a key this method returns true if all of the columns in the key are not nullable + public boolean isNonNullableKey(ImmutableBitSet columns) { + for (ImmutableBitSet key : nonNullablekeys) { + if (key.contains(columns)) { + return true; + } + } + return false; + } + @Override public boolean isKey(ImmutableBitSet columns) { for (ImmutableBitSet key : keys) { @@ -228,8 +245,9 @@ public boolean isKey(ImmutableBitSet columns) { return referentialConstraints; } - private List generateKeys() { + private Pair, List> generateKeys() { ImmutableList.Builder builder = ImmutableList.builder(); + ImmutableList.Builder nonNullbuilder = ImmutableList.builder(); // First PK final PrimaryKeyInfo pki; try { @@ -250,11 +268,12 @@ public boolean isKey(ImmutableBitSet columns) { } if (pkPos == rowType.getFieldNames().size()) { LOG.error("Column for primary key definition " + pkColName + " not found"); - return ImmutableList.of(); } keys.set(pkPos); } - builder.add(keys.build()); + ImmutableBitSet key = keys.build(); + builder.add(key); + nonNullbuilder.add(key); } // Then UKs final UniqueConstraint uki; @@ -266,23 +285,31 @@ public boolean isKey(ImmutableBitSet columns) { } for (List ukCols : uki.getUniqueConstraints().values()) { ImmutableBitSet.Builder keys = ImmutableBitSet.builder(); + boolean isNonNullable = true; for (UniqueConstraintCol ukCol : ukCols) { int ukPos; for (ukPos = 0; ukPos < rowType.getFieldNames().size(); ukPos++) { String colName = rowType.getFieldNames().get(ukPos); if (ukCol.colName.equals(colName)) { + if(rowType.getFieldList().get(ukPos).getType().isNullable()) { + // they should all be nullable + isNonNullable = false; + } break; } } if (ukPos == rowType.getFieldNames().size()) { LOG.error("Column for unique constraint definition " + ukCol.colName + " not found"); - return ImmutableList.of(); } keys.set(ukPos); } - builder.add(keys.build()); + ImmutableBitSet key = keys.build(); + builder.add(key); + if(isNonNullable) { + nonNullbuilder.add(key); + } } - return builder.build(); + return new Pair<>(builder.build(), nonNullbuilder.build()); } private List generateReferentialConstraints() { @@ -368,12 +395,11 @@ public RelNode toRel(ToRelContext context) { if (sortColumn.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC) { direction = Direction.ASCENDING; nullDirection = NullDirection.FIRST; - } - else { + } else { direction = Direction.DESCENDING; nullDirection = NullDirection.LAST; } - collationList.add(new RelFieldCollation(i,direction,nullDirection)); + collationList.add(new RelFieldCollation(i, direction, nullDirection)); break; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java index f43ef01293..ed6659c6cc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -171,7 +171,7 @@ public void onMatch(RelOptRuleCall call) { unique = true; } else { final Boolean unique0 = - mq.areColumnsUnique(joinInput, belowAggregateKey); + mq.areColumnsUnique(joinInput, belowAggregateKey, true); unique = unique0 != null && unique0; } if (unique) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index 5857f730a8..b7c31bdfca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -24,15 +24,19 @@ import java.util.Map; import java.util.Set; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.linq4j.Ord; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; @@ -300,18 +304,58 @@ private static RelNode project(DruidQuery dq, ImmutableBitSet fieldsUsed, private boolean isRexLiteral(final RexNode rexNode) { if(rexNode instanceof RexLiteral) { return true; - } - else if(rexNode instanceof RexCall + } else if(rexNode instanceof RexCall && ((RexCall)rexNode).getOperator().getKind() == SqlKind.CAST){ return isRexLiteral(((RexCall)(rexNode)).getOperands().get(0)); - } - else { + } else { return false; } } + + // if gby keys consist of pk/uk non-pk/non-uk columns are removed if they are not being used + private ImmutableBitSet generateNewGroupset(Aggregate aggregate, ImmutableBitSet fieldsUsed) { + + ImmutableBitSet originalGroupSet = aggregate.getGroupSet(); + + if (aggregate.getGroupSets().size() > 1 || aggregate.getIndicatorCount() > 0 + || fieldsUsed.contains(originalGroupSet)) { + // if there is grouping sets, indicator or all the group keys are being used we do no need to proceed further + return originalGroupSet; + } + + final RelNode input = aggregate.getInput(); + RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery(); + + final Set uniqueKeys = mq.getUniqueKeys(input, false); + if (uniqueKeys == null || uniqueKeys.isEmpty()) { + return originalGroupSet; + } + + // we have set of unique key, get to the key which is same as group by key + ImmutableBitSet groupByUniqueKey = null; + + for (ImmutableBitSet key : uniqueKeys) { + if (aggregate.getGroupSet().contains(key)) { + groupByUniqueKey = key; + break; + } + } + + if (groupByUniqueKey == null) { + // group by keys do not represent unique keys + return originalGroupSet; + } + + // we know group by key contains primary key and there is at least one column in group by which is not being used + // if that column is not part of key it should be removed + ImmutableBitSet nonKeyColumns = aggregate.getGroupSet().except(groupByUniqueKey); + ImmutableBitSet columnsToRemove = nonKeyColumns.except(fieldsUsed); + ImmutableBitSet newGroupSet = aggregate.getGroupSet().except(columnsToRemove); + + return newGroupSet; + } + /** - * Variant of {@link #trimFields(Aggregate, ImmutableBitSet, Set)} for - * {@link org.apache.calcite.rel.logical.LogicalAggregate}. * This method replaces group by 'constant key' with group by true (boolean) * if and only if * group by doesn't have grouping sets @@ -322,50 +366,193 @@ else if(rexNode instanceof RexCall * underneath aggregate * * This is mainly done so that hive is able to push down queries with - * group by 'constant key with type not supported by druid' into druid + * group by 'constant key with type not supported by druid' into druid. + * */ - public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, - Set extraFields) { - - Aggregate newAggregate = aggregate; - if (!(aggregate.getIndicatorCount() > 0) - && !(aggregate.getGroupSet().isEmpty()) - && !fieldsUsed.contains(aggregate.getGroupSet())) { - final RelNode input = aggregate.getInput(); - final RelDataType rowType = input.getRowType(); - RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); - final List newProjects = new ArrayList<>(); - - final List inputExprs = input.getChildExps(); - if(inputExprs == null || inputExprs.isEmpty()) { - return super.trimFields(newAggregate, fieldsUsed, extraFields); + private Aggregate rewriteGBConstantKeys(Aggregate aggregate, ImmutableBitSet fieldsUsed, + Set extraFields) { + if ((aggregate.getIndicatorCount() > 0) + || (aggregate.getGroupSet().isEmpty()) + || fieldsUsed.contains(aggregate.getGroupSet())) { + return aggregate; + } + + final RelNode input = aggregate.getInput(); + + + final RelDataType rowType = input.getRowType(); + RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); + final List newProjects = new ArrayList<>(); + + final List inputExprs = input.getChildExps(); + if (inputExprs == null || inputExprs.isEmpty()) { + return aggregate; + } + + boolean allConstants = true; + for (int key : aggregate.getGroupSet()) { + // getChildExprs on Join could return less number of expressions than there are coming out of join + if (inputExprs.size() <= key || !isRexLiteral(inputExprs.get(key))) { + allConstants = false; + break; } + } - boolean allConstants = true; - for(int key : aggregate.getGroupSet()) { - // getChildExprs on Join could return less number of expressions than there are coming out of join - if(inputExprs.size() <= key || !isRexLiteral(inputExprs.get(key))){ - allConstants = false; - break; + if (allConstants) { + for (int i = 0; i < rowType.getFieldCount(); i++) { + if (aggregate.getGroupSet().get(i)) { + newProjects.add(rexBuilder.makeLiteral(true)); + } else { + newProjects.add(rexBuilder.makeInputRef(input, i)); } } + relBuilder.push(input); + relBuilder.project(newProjects); + Aggregate newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(), + aggregate.getGroupSet(), null, aggregate.getAggCallList()); + return newAggregate; + } + return aggregate; + } - if (allConstants) { - for (int i = 0; i < rowType.getFieldCount(); i++) { - if (aggregate.getGroupSet().get(i)) { - newProjects.add(rexBuilder.makeLiteral(true)); - } else { - newProjects.add(rexBuilder.makeInputRef(input, i)); - } - } - relBuilder.push(input); - relBuilder.project(newProjects); - newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(), - aggregate.getGroupSet(), null, aggregate.getAggCallList()); + @Override + public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Set extraFields) { + // Fields: + // + // | sys fields | group fields | indicator fields | agg functions | + // + // Two kinds of trimming: + // + // 1. If agg rel has system fields but none of these are used, create an + // agg rel with no system fields. + // + // 2. If aggregate functions are not used, remove them. + // + // But group and indicator fields stay, even if they are not used. + + aggregate = rewriteGBConstantKeys(aggregate, fieldsUsed, extraFields); + + final RelDataType rowType = aggregate.getRowType(); + + // Compute which input fields are used. + // 1. group fields are always used + final ImmutableBitSet.Builder inputFieldsUsed = + aggregate.getGroupSet().rebuild(); + // 2. agg functions + for (AggregateCall aggCall : aggregate.getAggCallList()) { + for (int i : aggCall.getArgList()) { + inputFieldsUsed.set(i); + } + if (aggCall.filterArg >= 0) { + inputFieldsUsed.set(aggCall.filterArg); + } + } + + // Create input with trimmed columns. + final RelNode input = aggregate.getInput(); + final Set inputExtraFields = Collections.emptySet(); + final TrimResult trimResult = + trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields); + final RelNode newInput = trimResult.left; + final Mapping inputMapping = trimResult.right; + + ImmutableBitSet originalGroupSet = aggregate.getGroupSet(); + ImmutableBitSet updatedGroupSet = generateNewGroupset(aggregate, fieldsUsed); + ImmutableBitSet gbKeysDeleted = originalGroupSet.except(updatedGroupSet); + ImmutableBitSet updatedGroupFields = ImmutableBitSet.range(originalGroupSet.cardinality()); + final int updatedGroupCount = updatedGroupSet.cardinality(); + + // we need to clear the bits corresponding to deleted gb keys + int setIdx = 0; + while(setIdx != -1) { + setIdx = gbKeysDeleted.nextSetBit(setIdx); + if(setIdx != -1) { + updatedGroupFields = updatedGroupFields.clear(setIdx); + setIdx++; } } - return super.trimFields(newAggregate, fieldsUsed, extraFields); + fieldsUsed = + fieldsUsed.union(updatedGroupFields); + + // If the input is unchanged, and we need to project all columns, + // there's nothing to do. + if (input == newInput + && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) { + return result(aggregate, + Mappings.createIdentity(rowType.getFieldCount())); + } + + // update the group by keys based on inputMapping + ImmutableBitSet newGroupSet = + Mappings.apply(inputMapping, updatedGroupSet); + + // Which agg calls are used by our consumer? + int originalGroupCount = aggregate.getGroupSet().cardinality(); + int j = originalGroupCount; + int usedAggCallCount = 0; + for (int i = 0; i < aggregate.getAggCallList().size(); i++) { + if (fieldsUsed.get(j++)) { + ++usedAggCallCount; + } + } + + // Offset due to the number of system fields having changed. + Mapping mapping = + Mappings.create( + MappingType.INVERSE_SURJECTION, + rowType.getFieldCount(), + updatedGroupCount + usedAggCallCount); + + + // if group keys were reduced, it means we didn't have grouping therefore + // we don't need to transform group sets + ImmutableList newGroupSets = null; + if(!updatedGroupSet.equals(aggregate.getGroupSet())) { + newGroupSets = ImmutableList.of(newGroupSet); + } else { + newGroupSets = ImmutableList.copyOf( + Iterables.transform(aggregate.getGroupSets(), + input1 -> Mappings.apply(inputMapping, input1))); + } + + // Populate mapping of where to find the fields. System, group key and + // indicator fields first. + int gbKeyIdx = 0; + for (j = 0; j < originalGroupCount; j++) { + if(fieldsUsed.get(j)) { + mapping.set(j, gbKeyIdx); + gbKeyIdx++; + } + } + + // Now create new agg calls, and populate mapping for them. + relBuilder.push(newInput); + final List newAggCallList = new ArrayList<>(); + j = originalGroupCount; // because lookup in fieldsUsed is done using original group count + for (AggregateCall aggCall : aggregate.getAggCallList()) { + if (fieldsUsed.get(j)) { + final ImmutableList args = + relBuilder.fields( + Mappings.apply2(inputMapping, aggCall.getArgList())); + final RexNode filterArg = aggCall.filterArg < 0 ? null + : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg)); + RelBuilder.AggCall newAggCall = + relBuilder.aggregateCall(aggCall.getAggregation(), + aggCall.isDistinct(), aggCall.isApproximate(), + filterArg, aggCall.name, args); + mapping.set(j, updatedGroupCount + newAggCallList.size()); + newAggCallList.add(newAggCall); + } + ++j; + } + + final RelBuilder.GroupKey groupKey = + relBuilder.groupKey(newGroupSet, newGroupSets); + relBuilder.aggregate(groupKey, newAggCallList); + + return result(relBuilder.build(), mapping); } + /** * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for * {@link org.apache.calcite.rel.logical.LogicalProject}. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java new file mode 100644 index 0000000000..5ef945c3a1 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java @@ -0,0 +1,333 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.JoinInfo; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.BitSets; +import org.apache.calcite.util.ImmutableBitSet; + +import com.google.common.collect.ImmutableSet; +import org.apache.calcite.util.Util; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +import java.util.BitSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * EstimateUniqueKeys provides an ability to estimate unique keys based on statistics. + */ +//TODO: Ideally RelMdUniqueKeys should be modified (on Calcite side) to accept a parameter based on which +// this logic whoud be implemented +public final class EstimateUniqueKeys { + //~ Constructors ----------------------------------------------------------- + private EstimateUniqueKeys() { + } + + //~ Methods ---------------------------------------------------------------- + + private static Set getUniqueKeys(HiveFilter rel) { + return getUniqueKeys(rel.getInput()); + } + + private static Set getUniqueKeys(HiveSortLimit rel) { + return getUniqueKeys(rel.getInput()); + } + + private static Set getUniqueKeys(Correlate rel) { + return getUniqueKeys(rel.getLeft()); + } + + + //Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col) + // - min(col) = rowCount(col) + private static Set generateKeysUsingStatsEstimation(Project rel, + HiveTableScan tScan) { + Map posMap = new HashMap(); + int projectPos = 0; + int colStatsPos = 0; + + BitSet projectedCols = new BitSet(); + for (RexNode r : rel.getProjects()) { + if (r instanceof RexInputRef) { + projectedCols.set(((RexInputRef) r).getIndex()); + posMap.put(colStatsPos, projectPos); + colStatsPos++; + } + projectPos++; + } + + RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); + double numRows = mq.getRowCount(tScan); + List colStats = tScan.getColStat(BitSets + .toList(projectedCols)); + Set keys = new HashSet(); + + colStatsPos = 0; + for (ColStatistics cStat : colStats) { + boolean isKey = false; + if (cStat.getCountDistint() >= numRows) { + isKey = true; + } + if (!isKey && cStat.getRange() != null && + cStat.getRange().maxValue != null && + cStat.getRange().minValue != null) { + double r = cStat.getRange().maxValue.doubleValue() - + cStat.getRange().minValue.doubleValue() + 1; + isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON); + } + if (isKey) { + ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos)); + keys.add(key); + } + colStatsPos++; + } + + return keys; + + } + + private static Set getUniqueKeys(HiveProject rel) { + + HiveTableScan tScan = getTableScan(rel.getInput(), false); + if (tScan != null) { + return generateKeysUsingStatsEstimation(rel, tScan); + } + + // LogicalProject maps a set of rows to a different set; + // Without knowledge of the mapping function(whether it + // preserves uniqueness), it is only safe to derive uniqueness + // info from the child of a project when the mapping is f(a) => a. + // + // Further more, the unique bitset coming from the child needs + // to be mapped to match the output of the project. + final Map mapInToOutPos = new HashMap<>(); + final List projExprs = rel.getProjects(); + final Set projUniqueKeySet = new HashSet<>(); + + // Build an input to output position map. + for (int i = 0; i < projExprs.size(); i++) { + RexNode projExpr = projExprs.get(i); + if (projExpr instanceof RexInputRef) { + mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i); + } + } + + if (mapInToOutPos.isEmpty()) { + // if there's no RexInputRef in the projected expressions + // return empty set. + return projUniqueKeySet; + } + + Set childUniqueKeySet = + getUniqueKeys(rel.getInput()); + + if (childUniqueKeySet != null) { + // Now add to the projUniqueKeySet the child keys that are fully + // projected. + for (ImmutableBitSet colMask : childUniqueKeySet) { + ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder(); + boolean completeKeyProjected = true; + for (int bit : colMask) { + if (mapInToOutPos.containsKey(bit)) { + tmpMask.set(mapInToOutPos.get(bit)); + } else { + // Skip the child unique key if part of it is not + // projected. + completeKeyProjected = false; + break; + } + } + if (completeKeyProjected) { + projUniqueKeySet.add(tmpMask.build()); + } + } + } + + return projUniqueKeySet; + + } + + private static RelNode getRelNode(RelNode rel) { + if (rel != null && rel instanceof HepRelVertex) { + rel = ((HepRelVertex) rel).getCurrentRel(); + } else if (rel != null && rel instanceof RelSubset) { + rel = Util.first(((RelSubset)rel).getBest(), ((RelSubset) rel).getOriginal()); + } + return rel; + } + + private static Set getUniqueKeys(HiveJoin rel) { + RelNode left = getRelNode(rel.getLeft()); + RelNode right = getRelNode(rel.getRight()); + + // first add the different combinations of concatenated unique keys + // from the left and the right, adjusting the right hand side keys to + // reflect the addition of the left hand side + // + // NOTE zfong 12/18/06 - If the number of tables in a join is large, + // the number of combinations of unique key sets will explode. If + // that is undesirable, use RelMetadataQuery.areColumnsUnique() as + // an alternative way of getting unique key information. + + final Set retSet = new HashSet<>(); + final Set leftSet = getUniqueKeys(left); + Set rightSet = null; + + final Set tmpRightSet = getUniqueKeys(right); + int nFieldsOnLeft = left.getRowType().getFieldCount(); + + if (tmpRightSet != null) { + rightSet = new HashSet<>(); + for (ImmutableBitSet colMask : tmpRightSet) { + ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder(); + for (int bit : colMask) { + tmpMask.set(bit + nFieldsOnLeft); + } + rightSet.add(tmpMask.build()); + } + + if (leftSet != null) { + for (ImmutableBitSet colMaskRight : rightSet) { + for (ImmutableBitSet colMaskLeft : leftSet) { + retSet.add(colMaskLeft.union(colMaskRight)); + } + } + } + } + + // locate the columns that participate in equijoins + final JoinInfo joinInfo = rel.analyzeCondition(); + RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); + + // determine if either or both the LHS and RHS are unique on the + // equijoin columns + final Boolean leftUnique = + mq.areColumnsUnique(left, joinInfo.leftSet()); + final Boolean rightUnique = + mq.areColumnsUnique(right, joinInfo.rightSet()); + + // if the right hand side is unique on its equijoin columns, then we can + // add the unique keys from left if the left hand side is not null + // generating + if ((rightUnique != null) + && rightUnique + && (leftSet != null) + && !(rel.getJoinType().generatesNullsOnLeft())) { + retSet.addAll(leftSet); + } + + // same as above except left and right are reversed + if ((leftUnique != null) + && leftUnique + && (rightSet != null) + && !(rel.getJoinType().generatesNullsOnRight())) { + retSet.addAll(rightSet); + } + + return retSet; + } + + private static Set getUniqueKeys(HiveSemiJoin rel) { + // only return the unique keys from the LHS since a semijoin only + // returns the LHS + return getUniqueKeys(rel.getLeft()); + } + + private static Set getUniqueKeys(HiveAggregate rel) { + // group by keys form a unique key + return ImmutableSet.of(rel.getGroupSet()); + } + + private static Set getUniqueKeys(SetOp rel) { + if (!rel.all) { + return ImmutableSet.of( + ImmutableBitSet.range(rel.getRowType().getFieldCount())); + } + return ImmutableSet.of(); + } + + // Catch-all rule when none of the others apply. + public static Set getUniqueKeys(RelNode rel) { + rel = getRelNode(rel); + if (rel instanceof HiveFilter) { + return getUniqueKeys((HiveFilter) rel); + } else if (rel instanceof HiveSortLimit) { + return getUniqueKeys((HiveSortLimit) rel); + } else if (rel instanceof Correlate) { + return getUniqueKeys((Correlate) rel); + } else if (rel instanceof HiveProject) { + return getUniqueKeys((HiveProject) rel); + } else if (rel instanceof HiveJoin) { + return getUniqueKeys((HiveJoin) rel); + } else if (rel instanceof HiveSemiJoin) { + return getUniqueKeys((HiveSemiJoin) rel); + } else if (rel instanceof HiveAggregate) { + return getUniqueKeys((HiveAggregate) rel); + } else if (rel instanceof SetOp) { + return getUniqueKeys((SetOp) rel); + } else { + return null; + } + } + + /* + * traverse a path of Filter, Projects to get to the TableScan. + * In case of Unique keys, stop if you reach a Project, it will be handled + * by the invocation on the Project. + * In case of getting the base rowCount of a Path, keep going past a Project. + */ + static HiveTableScan getTableScan(RelNode r, boolean traverseProject) { + + while (r != null && !(r instanceof HiveTableScan)) { + if (r instanceof HepRelVertex) { + r = ((HepRelVertex) r).getCurrentRel(); + } else if (r instanceof Filter) { + r = ((Filter) r).getInput(); + } else if (traverseProject && r instanceof Project) { + r = ((Project) r).getInput(); + } else { + r = null; + } + } + return r == null ? null : (HiveTableScan) r; + } +} + +// End EstimateUniqueKeys.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java new file mode 100644 index 0000000000..2dede97723 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + + +import org.apache.calcite.rel.metadata.BuiltInMetadata; +import org.apache.calcite.rel.metadata.MetadataDef; +import org.apache.calcite.rel.metadata.MetadataHandler; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; + +/** + * HiveRelMdColumnUniqueness implements the ability to get unique keys for table scan op. + */ +public final class HiveRelMdColumnUniqueness + implements MetadataHandler { + public static final RelMetadataProvider SOURCE = + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.COLUMN_UNIQUENESS.method, new HiveRelMdColumnUniqueness()); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdColumnUniqueness() {} + + //~ Methods ---------------------------------------------------------------- + + public MetadataDef getDef() { + return BuiltInMetadata.ColumnUniqueness.DEF; + } + + public Boolean areColumnsUnique(HiveTableScan rel, RelMetadataQuery mq, + ImmutableBitSet columns, boolean ignoreNulls) { + if(ignoreNulls) { + return rel.getTable().isKey(columns); + } else { + RelOptHiveTable tbl = (RelOptHiveTable)rel.getTable(); + return tbl.isNonNullableKey(columns); + } + } +} + +// End HiveRelMdColumnUniqueness.java + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index 1ca1937ed9..576ed34bf3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -317,7 +317,7 @@ private static double pkSelectivity(Join joinRel, RelMetadataQuery mq, boolean l (!leftChild && joinRel.getJoinType().generatesNullsOnLeft())) { return 1.0; } else { - HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true); + HiveTableScan tScan = EstimateUniqueKeys.getTableScan(child, true); if (tScan != null) { double tRowCount = mq.getRowCount(tScan); return childRowCount / tRowCount; @@ -329,7 +329,10 @@ private static double pkSelectivity(Join joinRel, RelMetadataQuery mq, boolean l private static boolean isKey(ImmutableBitSet c, RelNode rel, RelMetadataQuery mq) { boolean isKey = false; - Set keys = mq.getUniqueKeys(rel); + //EstimateUniqueKeys doesn't go through metadata providers anymore, which means we will not be + // taking advantage of metadata caching anymore. This could potential increase query compile time + // Leaving a note here in case we see increase in timings + Set keys = EstimateUniqueKeys.getUniqueKeys(rel); if (keys != null) { for (ImmutableBitSet key : keys) { if (key.equals(c)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java index 3bf62c535c..7772335ac5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java @@ -17,31 +17,20 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; -import java.util.BitSet; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.plan.hep.HepRelVertex; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.metadata.BuiltInMetadata; import org.apache.calcite.rel.metadata.MetadataDef; import org.apache.calcite.rel.metadata.MetadataHandler; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.util.BitSets; import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; -import org.apache.hadoop.hive.ql.plan.ColStatistics; public class HiveRelMdUniqueKeys implements MetadataHandler { @@ -54,138 +43,15 @@ return BuiltInMetadata.UniqueKeys.DEF; } - /* - * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col) - * - min(col) = rowCount(col) - * - * Why are we intercepting Project and not TableScan? Because if we - * have a method for TableScan, it will not know which columns to check for. - * Inferring Uniqueness for all columns is very expensive right now. The flip - * side of doing this is, it only works post Field Trimming. - */ - public Set getUniqueKeys(Project rel, RelMetadataQuery mq, boolean ignoreNulls) { - HiveTableScan tScan = getTableScan(rel.getInput(), false); - - if (tScan == null) { - // If HiveTableScan is not found, e.g., not sequence of Project and - // Filter operators, execute the original getUniqueKeys method - - // LogicalProject maps a set of rows to a different set; - // Without knowledge of the mapping function(whether it - // preserves uniqueness), it is only safe to derive uniqueness - // info from the child of a project when the mapping is f(a) => a. - // - // Further more, the unique bitset coming from the child needs - // to be mapped to match the output of the project. - final Map mapInToOutPos = new HashMap<>(); - final List projExprs = rel.getProjects(); - final Set projUniqueKeySet = new HashSet<>(); - - // Build an input to output position map. - for (int i = 0; i < projExprs.size(); i++) { - RexNode projExpr = projExprs.get(i); - if (projExpr instanceof RexInputRef) { - mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i); - } - } - - if (mapInToOutPos.isEmpty()) { - // if there's no RexInputRef in the projected expressions - // return empty set. - return projUniqueKeySet; - } - - Set childUniqueKeySet = - mq.getUniqueKeys(rel.getInput(), ignoreNulls); - - if (childUniqueKeySet != null) { - // Now add to the projUniqueKeySet the child keys that are fully - // projected. - for (ImmutableBitSet colMask : childUniqueKeySet) { - ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder(); - boolean completeKeyProjected = true; - for (int bit : colMask) { - if (mapInToOutPos.containsKey(bit)) { - tmpMask.set(mapInToOutPos.get(bit)); - } else { - // Skip the child unique key if part of it is not - // projected. - completeKeyProjected = false; - break; - } - } - if (completeKeyProjected) { - projUniqueKeySet.add(tmpMask.build()); - } - } - } - - return projUniqueKeySet; - } - - Map posMap = new HashMap(); - int projectPos = 0; - int colStatsPos = 0; - - BitSet projectedCols = new BitSet(); - for (RexNode r : rel.getProjects()) { - if (r instanceof RexInputRef) { - projectedCols.set(((RexInputRef) r).getIndex()); - posMap.put(colStatsPos, projectPos); - colStatsPos++; - } - projectPos++; + public Set getUniqueKeys(HiveTableScan rel, RelMetadataQuery mq, + boolean ignoreNulls) { + RelOptHiveTable tbl = (RelOptHiveTable) rel.getTable(); + List keyList = tbl.getNonNullableKeys(); + if (keyList != null) { + Set keySet = new HashSet<>(keyList); + return keySet; } - - double numRows = mq.getRowCount(tScan); - List colStats = tScan.getColStat(BitSets - .toList(projectedCols)); - Set keys = new HashSet(); - - colStatsPos = 0; - for (ColStatistics cStat : colStats) { - boolean isKey = false; - if (cStat.getCountDistint() >= numRows) { - isKey = true; - } - if ( !isKey && cStat.getRange() != null && - cStat.getRange().maxValue != null && - cStat.getRange().minValue != null) { - double r = cStat.getRange().maxValue.doubleValue() - - cStat.getRange().minValue.doubleValue() + 1; - isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON); - } - if ( isKey ) { - ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos)); - keys.add(key); - } - colStatsPos++; - } - - return keys; + return null; } - - /* - * traverse a path of Filter, Projects to get to the TableScan. - * In case of Unique keys, stop if you reach a Project, it will be handled - * by the invocation on the Project. - * In case of getting the base rowCount of a Path, keep going past a Project. - */ - static HiveTableScan getTableScan(RelNode r, boolean traverseProject) { - - while (r != null && !(r instanceof HiveTableScan)) { - if (r instanceof HepRelVertex) { - r = ((HepRelVertex) r).getCurrentRel(); - } else if (r instanceof Filter) { - r = ((Filter) r).getInput(); - } else if (traverseProject && r instanceof Project) { - r = ((Project) r).getInput(); - } else { - r = null; - } - } - return r == null ? null : (HiveTableScan) r; - } - } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java index 0a499b1a1b..8f477f4700 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java @@ -19,14 +19,12 @@ import java.io.File; import java.util.List; -import java.util.Properties; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; @@ -40,7 +38,6 @@ import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcAcidRowBatchReader.SortMergedDeleteEventRegistry; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; -import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.io.LongWritable; @@ -66,7 +63,6 @@ private FileSystem fs; private Path root; private ObjectInspector inspector; - private ObjectInspector originalInspector; public static class DummyRow { LongWritable field; @@ -92,24 +88,6 @@ static String getColumnTypesProperty() { } - /** - * Dummy row for original files. - */ - public static class DummyOriginalRow { - LongWritable field; - - DummyOriginalRow(long val) { - field = new LongWritable(val); - } - - static String getColumnNamesProperty() { - return "field"; - } - static String getColumnTypesProperty() { - return "bigint"; - } - } - @Before public void setup() throws Exception { conf = new JobConf(); @@ -132,9 +110,6 @@ public void setup() throws Exception { synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (DummyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); - - originalInspector = ObjectInspectorFactory.getReflectionObjectInspector(DummyOriginalRow.class, - ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } } @Test @@ -395,158 +370,6 @@ private void testDeleteEventFiltering2() throws Exception { } } - - @Test - public void testDeleteEventOriginalFilteringOn() throws Exception { - HiveConf.setBoolVar(conf, HiveConf.ConfVars.FILTER_DELETE_EVENTS, true); - testDeleteEventOriginalFiltering(); - } - - @Test - public void testDeleteEventOriginalFilteringOff() throws Exception { - HiveConf.setBoolVar(conf, HiveConf.ConfVars.FILTER_DELETE_EVENTS, false); - testDeleteEventOriginalFiltering(); - } - - public void testDeleteEventOriginalFiltering() throws Exception { - boolean filterOn = - HiveConf.getBoolVar(conf, HiveConf.ConfVars.FILTER_DELETE_EVENTS); - - conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, false); - - // Create 3 original files with 3 rows each - Properties properties = new Properties(); - properties.setProperty("columns", DummyOriginalRow.getColumnNamesProperty()); - properties.setProperty("columns.types", DummyOriginalRow.getColumnTypesProperty()); - - OrcFile.WriterOptions writerOptions = OrcFile.writerOptions(properties, conf); - writerOptions.inspector(originalInspector); - - Path testFilePath = new Path(root, "000000_0"); - Writer writer = OrcFile.createWriter(testFilePath, writerOptions); - - writer.addRow(new DummyOriginalRow(0)); - writer.addRow(new DummyOriginalRow(0)); - writer.addRow(new DummyOriginalRow(0)); - writer.close(); - - testFilePath = new Path(root, "000000_0_copy_1"); - - writer = OrcFile.createWriter(testFilePath, writerOptions); - writer.addRow(new DummyOriginalRow(0)); - writer.addRow(new DummyOriginalRow(0)); - writer.addRow(new DummyOriginalRow(0)); - writer.close(); - - testFilePath = new Path(root, "000000_0_copy_2"); - - writer = OrcFile.createWriter(testFilePath, writerOptions); - writer.addRow(new DummyOriginalRow(0)); - writer.addRow(new DummyOriginalRow(0)); - writer.addRow(new DummyOriginalRow(0)); - writer.close(); - - conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); - - int bucket = 0; - - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) - .filesystem(fs) - .bucket(bucket) - .writingBase(false) - .minimumWriteId(1) - .maximumWriteId(1) - .inspector(inspector) - .reporter(Reporter.NULL) - .recordIdColumn(1) - .finalDestination(root); - - int bucketProperty = BucketCodec.V1.encode(options); - - RecordUpdater updater = new OrcRecordUpdater(root, options); - - //delete 1 row from each of the original files - // Delete the last record in this split to test boundary conditions. It should not be present in the delete event - // registry for the next split - updater.delete(options.getMinimumWriteId(), new DummyRow(-1, 2, 0, bucket)); - // Delete the first record in this split to test boundary conditions. It should not be present in the delete event - // registry for the previous split - updater.delete(options.getMinimumWriteId(), new DummyRow(-1, 3, 0, bucket)); - updater.delete(options.getMinimumWriteId(), new DummyRow(-1, 7, 0, bucket)); - updater.close(false); - - //HWM is not important - just make sure deltas created above are read as if committed - conf.set(ValidWriteIdList.VALID_WRITEIDS_KEY, "tbl:2:" + Long.MAX_VALUE + "::"); - - // Set vector mode to true int the map work so that we recognize this as a vector mode execution during the split - // generation. Without this we will not compute the offset for the synthetic row ids. - MapWork mapWork = new MapWork(); - mapWork.setVectorMode(true); - VectorizedRowBatchCtx vrbContext = new VectorizedRowBatchCtx(); - mapWork.setVectorizedRowBatchCtx(vrbContext); - HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp"); - Utilities.setMapWork(conf, mapWork); - - // now we have 3 delete events total, but for each split we should only - // load 1 into DeleteRegistry (if filtering is on) - List> splitStrategies = getSplitStrategies(); - assertEquals(1, splitStrategies.size()); - List splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits(); - - assertEquals(3, splits.size()); - assertEquals(root.toUri().toString() + File.separator + "000000_0", - splits.get(0).getPath().toUri().toString()); - assertTrue(splits.get(0).isOriginal()); - - assertEquals(root.toUri().toString() + File.separator + "000000_0_copy_1", - splits.get(1).getPath().toUri().toString()); - assertTrue(splits.get(1).isOriginal()); - - assertEquals(root.toUri().toString() + File.separator + "000000_0_copy_2", - splits.get(2).getPath().toUri().toString()); - assertTrue(splits.get(2).isOriginal()); - - VectorizedOrcAcidRowBatchReader vectorizedReader = - new VectorizedOrcAcidRowBatchReader(splits.get(0), conf, Reporter.NULL, vrbContext); - ColumnizedDeleteEventRegistry deleteEventRegistry = - (ColumnizedDeleteEventRegistry) vectorizedReader.getDeleteEventRegistry(); - assertEquals("number of delete events for stripe 1", filterOn ? 1 : 3, deleteEventRegistry.size()); - OrcRawRecordMerger.KeyInterval keyInterval = vectorizedReader.getKeyInterval(); - if(filterOn) { - assertEquals(new OrcRawRecordMerger.KeyInterval( - new RecordIdentifier(0, bucketProperty, 0), - new RecordIdentifier(0, bucketProperty, 2)), - keyInterval); - } else { - assertEquals(new OrcRawRecordMerger.KeyInterval(null, null), keyInterval); - } - - vectorizedReader = new VectorizedOrcAcidRowBatchReader(splits.get(1), conf, Reporter.NULL, vrbContext); - deleteEventRegistry = (ColumnizedDeleteEventRegistry) vectorizedReader.getDeleteEventRegistry(); - assertEquals("number of delete events for stripe 2", filterOn ? 1 : 3, deleteEventRegistry.size()); - keyInterval = vectorizedReader.getKeyInterval(); - if(filterOn) { - assertEquals(new OrcRawRecordMerger.KeyInterval( - new RecordIdentifier(0, bucketProperty, 3), - new RecordIdentifier(0, bucketProperty, 5)), - keyInterval); - } else { - assertEquals(new OrcRawRecordMerger.KeyInterval(null, null), keyInterval); - } - - vectorizedReader = new VectorizedOrcAcidRowBatchReader(splits.get(2), conf, Reporter.NULL, vrbContext); - deleteEventRegistry = (ColumnizedDeleteEventRegistry) vectorizedReader.getDeleteEventRegistry(); - assertEquals("number of delete events for stripe 3", filterOn ? 1 : 3, deleteEventRegistry.size()); - keyInterval = vectorizedReader.getKeyInterval(); - if(filterOn) { - assertEquals(new OrcRawRecordMerger.KeyInterval( - new RecordIdentifier(0, bucketProperty, 6), - new RecordIdentifier(0, bucketProperty, 8)), keyInterval); - } else { - assertEquals(new OrcRawRecordMerger.KeyInterval(null, null), keyInterval); - } - } - @Test public void testVectorizedOrcAcidRowBatchReader() throws Exception { conf.set("bucket_count", "1"); diff --git a/ql/src/test/queries/clientpositive/constraints_optimization.q b/ql/src/test/queries/clientpositive/constraints_optimization.q new file mode 100644 index 0000000000..70ab8509c5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/constraints_optimization.q @@ -0,0 +1,149 @@ +set hive.strict.checks.cartesian.product=false; + +CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely); + +CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_id` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`, `d_id`) disable rely); + + -- group by key has single primary key + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey; + + -- mix of primary + non-primary keys + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation; + + -- multiple keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id; + + -- multiple keys + non-keys + different order + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason + order by d_datekey limit 10; + + -- multiple keys in different order and mixed with non-keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10; + + -- same as above but with aggregate + EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10; + + -- join + insert into dates_removal_n0(d_datekey, d_id) values(3, 0); + insert into dates_removal_n0(d_datekey, d_id) values(3, 1); + insert into customer_removal_n0 (c_custkey) values(3); + + EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id; + SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id; + + -- group by keys are not primary keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason + order by d_datekey limit 10; + + -- negative + -- with aggregate function + EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA') + group by c_custkey, c_nation; + + DROP TABLE customer_removal_n0; + DROP TABLE dates_removal_n0; + + -- group by reduction optimization + create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely); + insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null); + + -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique + explain select key1 from dest_g21 group by key1, value1; + select key1 from dest_g21 group by key1, value1; + -- same query but with filter + explain select key1 from dest_g21 where value1 > 1 group by key1, value1; + select key1 from dest_g21 where value1 > 1 group by key1, value1; + + explain select key1 from dest_g21 where key1 > 1 group by key1, value1; + select key1 from dest_g21 where key1 > 1 group by key1, value1; + + -- only value1 will be removed because there is aggregate call + explain select count(key1) from dest_g21 group by key1, value1; + select count(key1) from dest_g21 group by key1, value1; + + explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1; + select count(key1) from dest_g21 where value1 > 1 group by key1, value1; + + -- t1.key is unique even after join therefore group by = group by (t1.key) + explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + + explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1; + + -- both aggregate and one of the key1 should be removed + explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub; + select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub; + + -- one of the aggregate will be removed and one of the key1 will be removed + explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub; + select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub; + + DROP table dest_g21; + +CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string); +INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010'); + +-- explicit NOT NULL filter +explain select i, j from tconst where i is not null group by i,j, d_year; +select i, j from tconst where i is not null group by i,j, d_year; + +-- filter on i should be removed +explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year; +select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year; + +-- where will be removed since i is not null is always true +explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year; +select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year; + +-- should not have implicit filter on join keys +explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year; +select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year; + +-- both join keys have NOT NULL +explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year; +select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year; + +DROP TABLE tconst; + + +-- UNIQUE + NOT NULL (same as primary key) +create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely); +explain select key1 from dest_g21 group by key1, value1; + +-- UNIQUE with nullabiity +create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely); +explain select key1 from dest_g24 group by key1, value1; + +DROP TABLE dest_g21; +DROP TABLE dest_g24; diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out new file mode 100644 index 0000000000..b45b7c409f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -0,0 +1,2010 @@ +PREHOOK: query: CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: CREATE TABLE `customer_removal_n0`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_removal_n0 +PREHOOK: query: CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_id` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`, `d_id`) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: CREATE TABLE `dates_removal_n0`( + `d_datekey` bigint, + `d_id` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`, `d_id`) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dates_removal_n0 +PREHOOK: query: -- group by key has single primary key + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- group by key has single primary key + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: customer_removal_n0 + filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Filter Operator + predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: -- mix of primary + non-primary keys + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- mix of primary + non-primary keys + EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: customer_removal_n0 + filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Filter Operator + predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: -- multiple keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id +PREHOOK: type: QUERY +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- multiple keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: -- multiple keys + non-keys + different order + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason + order by d_datekey limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- multiple keys + non-keys + different order + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason + order by d_datekey limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: -- multiple keys in different order and mixed with non-keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- multiple keys in different order and mixed with non-keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: -- same as above but with aggregate + EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- same as above but with aggregate + EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey, + d_sellingseason order by d_datekey limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_datekey (type: bigint), d_id (type: bigint) + outputColumnNames: d_datekey, d_id + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: d_datekey (type: bigint), d_id (type: bigint) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Group By Operator + aggregations: count() + keys: d_datekey (type: bigint), d_id (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- join + insert into dates_removal_n0(d_datekey, d_id) values(3, 0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: -- join + insert into dates_removal_n0(d_datekey, d_id) values(3, 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dates_removal_n0 +POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE [] +PREHOOK: query: insert into dates_removal_n0(d_datekey, d_id) values(3, 1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: insert into dates_removal_n0(d_datekey, d_id) values(3, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dates_removal_n0 +POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE [] +POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE [] +PREHOOK: query: insert into customer_removal_n0 (c_custkey) values(3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: insert into customer_removal_n0 (c_custkey) values(3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@customer_removal_n0 +POSTHOOK: Lineage: customer_removal_n0.c_address SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_city SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_custkey SCRIPT [] +POSTHOOK: Lineage: customer_removal_n0.c_mktsegment SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_name SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_nation SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_phone SIMPLE [] +POSTHOOK: Lineage: customer_removal_n0.c_region SIMPLE [] +PREHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_datekey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: customer_removal_n0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +3 +3 +PREHOOK: query: -- group by keys are not primary keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason + order by d_datekey limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- group by keys are not primary keys + EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason + order by d_datekey limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dates_removal_n0 + filterExpr: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_year) IN (1985, 2004) (type: boolean) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_datekey (type: bigint), d_sellingseason (type: string) + outputColumnNames: d_datekey, d_sellingseason + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: d_datekey (type: bigint), d_sellingseason (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + top n: 10 + Group By Operator + keys: d_datekey (type: bigint), d_sellingseason (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: -- negative + -- with aggregate function + EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA') + group by c_custkey, c_nation +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +POSTHOOK: query: -- negative + -- with aggregate function + EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA') + group by c_custkey, c_nation +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_removal_n0 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: customer_removal_n0 + filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean) + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c_custkey (type: bigint) + outputColumnNames: c_custkey + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: c_custkey (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP TABLE customer_removal_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_removal_n0 +PREHOOK: Output: default@customer_removal_n0 +POSTHOOK: query: DROP TABLE customer_removal_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_removal_n0 +POSTHOOK: Output: default@customer_removal_n0 +PREHOOK: query: DROP TABLE dates_removal_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dates_removal_n0 +PREHOOK: Output: default@dates_removal_n0 +POSTHOOK: query: DROP TABLE dates_removal_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dates_removal_n0 +POSTHOOK: Output: default@dates_removal_n0 +PREHOOK: query: -- group by reduction optimization + create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: -- group by reduction optimization + create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g21 +PREHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dest_g21 +POSTHOOK: Lineage: dest_g21.key1 SCRIPT [] +POSTHOOK: Lineage: dest_g21.value1 SCRIPT [] +PREHOOK: query: -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique + explain select key1 from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique + explain select key1 from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key1 from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1 from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +2 +3 +4 +5 +6 +PREHOOK: query: -- same query but with filter + explain select key1 from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- same query but with filter + explain select key1 from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 > 1.0D) (type: boolean) + Filter Operator + predicate: (value1 > 1.0D) (type: boolean) + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +2 +4 +PREHOOK: query: explain select key1 from dest_g21 where key1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: explain select key1 from dest_g21 where key1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + filterExpr: (key1 > 1) (type: boolean) + Filter Operator + predicate: (key1 > 1) (type: boolean) + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key1 from dest_g21 where key1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1 from dest_g21 where key1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +2 +3 +4 +5 +6 +PREHOOK: query: -- only value1 will be removed because there is aggregate call + explain select count(key1) from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- only value1 will be removed because there is aggregate call + explain select count(key1) from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g21 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: key1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(key1) from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select count(key1) from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +1 +1 +1 +1 +1 +PREHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 > 1.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 > 1.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: key1 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +1 +1 +PREHOOK: query: -- t1.key is unique even after join therefore group by = group by (t1.key) + explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- t1.key is unique even after join therefore group by = group by (t1.key) + explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +4 +PREHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 > 2.0D) (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +PREHOOK: query: -- both aggregate and one of the key1 should be removed + explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- both aggregate and one of the key1 should be removed + explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 < 4.5D) (type: boolean) + Filter Operator + predicate: (value1 < 4.5D) (type: boolean) + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 +2 +3 +4 +PREHOOK: query: -- one of the aggregate will be removed and one of the key1 will be removed + explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: -- one of the aggregate will be removed and one of the key1 will be removed + explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g21 + filterExpr: (value1 < 4.5D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (value1 < 4.5D) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: int) + outputColumnNames: key1 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(key1) + keys: key1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +1 1 +3 3 +4 4 +2 2 +PREHOOK: query: DROP table dest_g21 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g21 +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: DROP table dest_g21 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g21 +POSTHOOK: Output: default@dest_g21 +PREHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tconst +POSTHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tconst +PREHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tconst +POSTHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tconst +POSTHOOK: Lineage: tconst.d_year SCRIPT [] +POSTHOOK: Lineage: tconst.i SCRIPT [] +POSTHOOK: Lineage: tconst.j SCRIPT [] +PREHOOK: query: explain select i, j from tconst where i is not null group by i,j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: explain select i, j from tconst where i is not null group by i,j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tconst + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), j (type: int), d_year (type: string) + outputColumnNames: i, j, d_year + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: i (type: int), j (type: int), d_year (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i, j from tconst where i is not null group by i,j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select i, j from tconst where i is not null group by i,j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +3 3 +2 NULL +1 1 +PREHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tconst + filterExpr: j is not null (type: boolean) + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: j is not null (type: boolean) + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: i (type: int), j (type: int), d_year (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +3 3 +1 1 +PREHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tconst + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), j (type: int), d_year (type: string) + outputColumnNames: i, j, d_year + Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: i (type: int), j (type: int), d_year (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +3 3 +2 NULL +1 1 +PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), d_year (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: j is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: j is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: j (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +1 +3 +PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), d_year (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +PREHOOK: type: QUERY +PREHOOK: Input: default@tconst +#### A masked pattern was here #### +POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tconst +#### A masked pattern was here #### +1 +2 +3 +PREHOOK: query: DROP TABLE tconst +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tconst +PREHOOK: Output: default@tconst +POSTHOOK: query: DROP TABLE tconst +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tconst +POSTHOOK: Output: default@tconst +PREHOOK: query: create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g21 +PREHOOK: query: explain select key1 from dest_g21 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +POSTHOOK: query: explain select key1 from dest_g21 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g21 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest_g21 + Select Operator + expressions: key1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g24 +POSTHOOK: query: create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g24 +PREHOOK: query: explain select key1 from dest_g24 group by key1, value1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g24 +#### A masked pattern was here #### +POSTHOOK: query: explain select key1 from dest_g24 group by key1, value1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g24 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dest_g24 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key1 (type: int), value1 (type: double) + outputColumnNames: key1, value1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key1 (type: int), value1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: double) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP TABLE dest_g21 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g21 +PREHOOK: Output: default@dest_g21 +POSTHOOK: query: DROP TABLE dest_g21 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g21 +POSTHOOK: Output: default@dest_g21 +PREHOOK: query: DROP TABLE dest_g24 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g24 +PREHOOK: Output: default@dest_g24 +POSTHOOK: query: DROP TABLE dest_g24 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g24 +POSTHOOK: Output: default@dest_g24 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out index 735a4db2ee..a7a1a0b082 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out @@ -641,11 +641,13 @@ PREHOOK: query: explain select empid, deptno from emps_n3 group by empid, deptno PREHOOK: type: QUERY PREHOOK: Input: default@emps_n3 +PREHOOK: Input: default@mv1_n2 #### A masked pattern was here #### POSTHOOK: query: explain select empid, deptno from emps_n3 group by empid, deptno POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n3 +POSTHOOK: Input: default@mv1_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage @@ -656,7 +658,7 @@ STAGE PLANS: limit: -1 Processor Tree: TableScan - alias: emps_n3 + alias: default.mv1_n2 Select Operator expressions: empid (type: int), deptno (type: int) outputColumnNames: _col0, _col1 @@ -665,10 +667,12 @@ STAGE PLANS: PREHOOK: query: select empid, deptno from emps_n3 group by empid, deptno PREHOOK: type: QUERY PREHOOK: Input: default@emps_n3 +PREHOOK: Input: default@mv1_n2 #### A masked pattern was here #### POSTHOOK: query: select empid, deptno from emps_n3 group by empid, deptno POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n3 +POSTHOOK: Input: default@mv1_n2 #### A masked pattern was here #### 100 10 110 10 @@ -709,11 +713,13 @@ PREHOOK: query: explain select empid, name from emps_n3 group by empid, name PREHOOK: type: QUERY PREHOOK: Input: default@emps_n3 +PREHOOK: Input: default@mv1_n2 #### A masked pattern was here #### POSTHOOK: query: explain select empid, name from emps_n3 group by empid, name POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n3 +POSTHOOK: Input: default@mv1_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-0 is a root stage @@ -724,7 +730,7 @@ STAGE PLANS: limit: -1 Processor Tree: TableScan - alias: emps_n3 + alias: default.mv1_n2 Select Operator expressions: empid (type: int), name (type: varchar(256)) outputColumnNames: _col0, _col1 @@ -733,10 +739,12 @@ STAGE PLANS: PREHOOK: query: select empid, name from emps_n3 group by empid, name PREHOOK: type: QUERY PREHOOK: Input: default@emps_n3 +PREHOOK: Input: default@mv1_n2 #### A masked pattern was here #### POSTHOOK: query: select empid, name from emps_n3 group by empid, name POSTHOOK: type: QUERY POSTHOOK: Input: default@emps_n3 +POSTHOOK: Input: default@mv1_n2 #### A masked pattern was here #### 100 Bill 110 Theodore