diff --git druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java index da6d493..81fff6b 100644 --- druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java +++ druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java @@ -90,7 +90,7 @@ /** * DruidStorageHandler provides a HiveStorageHandler implementation for Druid. */ -@SuppressWarnings({ "deprecation", "rawtypes" }) +@SuppressWarnings({ "rawtypes" }) public class DruidStorageHandler extends DefaultHiveMetaHook implements HiveStorageHandler { protected static final Logger LOG = LoggerFactory.getLogger(DruidStorageHandler.class); @@ -281,7 +281,7 @@ public void publishSegments(Table table, boolean overwrite) throws MetaException if (MetaStoreUtils.isExternalTable(table)) { return; } - LOG.info("Committing table {} to the druid metastore", table.getDbName()); + LOG.info("Committing table {} to the druid metastore", table.getTableName()); final Path tableDir = getSegmentDescriptorDir(); try { List segmentList = DruidStorageHandlerUtils @@ -498,8 +498,7 @@ public void rollbackInsertTable(Table table, boolean overwrite) throws MetaExcep } @Override - public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties - ) { + public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { jobProperties.put(Constants.DRUID_SEGMENT_VERSION, new DateTime().toString()); jobProperties.put(Constants.DRUID_JOB_WORKING_DIRECTORY, getStagingWorkingDir().toString()); // DruidOutputFormat will write segments in an intermediate directory @@ -508,8 +507,7 @@ public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties - ) { + public void configureTableJobProperties(TableDesc tableDesc, Map jobProperties) { } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 646bb23..1d9cbe3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -133,6 +133,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry; import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreChecker; +import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; @@ -4667,11 +4668,11 @@ private int createView(Hive db, CreateViewDesc crtView) throws HiveException { // create new view Table tbl = db.newTable(crtView.getViewName()); tbl.setViewOriginalText(crtView.getViewOriginalText()); + tbl.setViewExpandedText(crtView.getViewExpandedText()); if (crtView.isMaterialized()) { tbl.setRewriteEnabled(crtView.isRewriteEnabled()); tbl.setTableType(TableType.MATERIALIZED_VIEW); } else { - tbl.setViewExpandedText(crtView.getViewExpandedText()); tbl.setTableType(TableType.VIRTUAL_VIEW); } tbl.setSerializationLib(null); @@ -4700,12 +4701,46 @@ private int createView(Hive db, CreateViewDesc crtView) throws HiveException { if (crtView.getLocation() != null) { tbl.setDataLocation(new Path(crtView.getLocation())); } + + if (crtView.getStorageHandler() != null) { + tbl.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + crtView.getStorageHandler()); + } + HiveStorageHandler storageHandler = tbl.getStorageHandler(); + + /* + * We use LazySimpleSerDe by default. + * + * If the user didn't specify a SerDe, and any of the columns are not simple + * types, we will have to use DynamicSerDe instead. + */ + if (crtView.getSerde() == null) { + if (storageHandler == null) { + LOG.info("Default to LazySimpleSerDe for materialized view " + crtView.getViewName()); + tbl.setSerializationLib(LazySimpleSerDe.class.getName()); + } else { + String serDeClassName = storageHandler.getSerDeClass().getName(); + LOG.info("Use StorageHandler-supplied " + serDeClassName + + " for materialized view " + crtView.getViewName()); + tbl.setSerializationLib(serDeClassName); + } + } else { + // let's validate that the serde exists + DDLTask.validateSerDe(crtView.getSerde(), conf); + tbl.setSerializationLib(crtView.getSerde()); + } + // Short circuit the checks that the input format is valid, this is configured for all // materialized views and doesn't change so we don't need to check it constantly. - tbl.getSd().setInputFormat(crtView.getInputFormat()); - tbl.getSd().setOutputFormat(crtView.getOutputFormat()); - tbl.getSd().setSerdeInfo(new SerDeInfo(crtView.getSerde(), crtView.getSerde(), - crtView.getSerdeProps())); + tbl.setInputFormatClass(crtView.getInputFormat()); + tbl.setOutputFormatClass(crtView.getOutputFormat()); + if (crtView.getInputFormat() != null && !crtView.getInputFormat().isEmpty()) { + tbl.getSd().setInputFormat(tbl.getInputFormatClass().getName()); + } + if (crtView.getOutputFormat() != null && !crtView.getOutputFormat().isEmpty()) { + tbl.getSd().setOutputFormat(tbl.getOutputFormatClass().getName()); + } } db.createTable(tbl, crtView.getIfNotExists()); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index d661f10..db6b925 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -51,6 +51,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import javax.jdo.JDODataStoreException; @@ -62,13 +63,13 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.hive.common.BlobStorageUtils; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable; +import org.apache.hadoop.hive.common.log.InPlaceUpdate; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.io.HdfsUtils; @@ -137,7 +138,6 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.index.HiveIndexHandler; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.common.log.InPlaceUpdate; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; @@ -151,19 +151,18 @@ import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Splitter; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.util.concurrent.ThreadFactoryBuilder; -import com.google.common.base.Splitter; /** * This class has functions that implement meta data/DDL operations using calls @@ -4217,8 +4216,30 @@ public void dropConstraint(String dbName, String tableName, String constraintNam * @throws HiveException */ public PrimaryKeyInfo getPrimaryKeys(String dbName, String tblName) throws HiveException { + return getPrimaryKeys(dbName, tblName, false); + } + + /** + * Get primary key columns associated with the table that are available for optimization. + * + * @param dbName Database Name + * @param tblName Table Name + * @return Primary Key associated with the table. + * @throws HiveException + */ + public PrimaryKeyInfo getReliablePrimaryKeys(String dbName, String tblName) throws HiveException { + return getPrimaryKeys(dbName, tblName, true); + } + + private PrimaryKeyInfo getPrimaryKeys(String dbName, String tblName, boolean onlyReliable) + throws HiveException { try { List primaryKeys = getMSC().getPrimaryKeys(new PrimaryKeysRequest(dbName, tblName)); + if (onlyReliable && primaryKeys != null && !primaryKeys.isEmpty()) { + primaryKeys = primaryKeys.stream() + .filter(pk -> pk.isRely_cstr()) + .collect(Collectors.toList()); + } return new PrimaryKeyInfo(primaryKeys, tblName, dbName); } catch (Exception e) { throw new HiveException(e); @@ -4234,8 +4255,30 @@ public PrimaryKeyInfo getPrimaryKeys(String dbName, String tblName) throws HiveE * @throws HiveException */ public ForeignKeyInfo getForeignKeys(String dbName, String tblName) throws HiveException { + return getForeignKeys(dbName, tblName, false); + } + + /** + * Get foreign keys associated with the table that are available for optimization. + * + * @param dbName Database Name + * @param tblName Table Name + * @return Foreign keys associated with the table. + * @throws HiveException + */ + public ForeignKeyInfo getReliableForeignKeys(String dbName, String tblName) throws HiveException { + return getForeignKeys(dbName, tblName, true); + } + + private ForeignKeyInfo getForeignKeys(String dbName, String tblName, boolean onlyReliable) + throws HiveException { try { List foreignKeys = getMSC().getForeignKeys(new ForeignKeysRequest(null, null, dbName, tblName)); + if (onlyReliable && foreignKeys != null && !foreignKeys.isEmpty()) { + foreignKeys = foreignKeys.stream() + .filter(fk -> fk.isRely_cstr()) + .collect(Collectors.toList()); + } return new ForeignKeyInfo(foreignKeys, tblName, dbName); } catch (Exception e) { throw new HiveException(e); @@ -4251,9 +4294,31 @@ public ForeignKeyInfo getForeignKeys(String dbName, String tblName) throws HiveE * @throws HiveException */ public UniqueConstraint getUniqueConstraints(String dbName, String tblName) throws HiveException { + return getUniqueConstraints(dbName, tblName, false); + } + + /** + * Get unique constraints associated with the table that are available for optimization. + * + * @param dbName Database Name + * @param tblName Table Name + * @return Unique constraints associated with the table. + * @throws HiveException + */ + public UniqueConstraint getReliableUniqueConstraints(String dbName, String tblName) throws HiveException { + return getUniqueConstraints(dbName, tblName, true); + } + + private UniqueConstraint getUniqueConstraints(String dbName, String tblName, boolean onlyReliable) + throws HiveException { try { List uniqueConstraints = getMSC().getUniqueConstraints( new UniqueConstraintsRequest(dbName, tblName)); + if (onlyReliable && uniqueConstraints != null && !uniqueConstraints.isEmpty()) { + uniqueConstraints = uniqueConstraints.stream() + .filter(uk -> uk.isRely_cstr()) + .collect(Collectors.toList()); + } return new UniqueConstraint(uniqueConstraints, tblName, dbName); } catch (Exception e) { throw new HiveException(e); @@ -4269,9 +4334,31 @@ public UniqueConstraint getUniqueConstraints(String dbName, String tblName) thro * @throws HiveException */ public NotNullConstraint getNotNullConstraints(String dbName, String tblName) throws HiveException { + return getNotNullConstraints(dbName, tblName, false); + } + + /** + * Get not null constraints associated with the table that are available for optimization. + * + * @param dbName Database Name + * @param tblName Table Name + * @return Not null constraints associated with the table. + * @throws HiveException + */ + public NotNullConstraint getReliableNotNullConstraints(String dbName, String tblName) throws HiveException { + return getNotNullConstraints(dbName, tblName, true); + } + + private NotNullConstraint getNotNullConstraints(String dbName, String tblName, boolean onlyReliable) + throws HiveException { try { List notNullConstraints = getMSC().getNotNullConstraints( new NotNullConstraintsRequest(dbName, tblName)); + if (onlyReliable && notNullConstraints != null && !notNullConstraints.isEmpty()) { + notNullConstraints = notNullConstraints.stream() + .filter(nnc -> nnc.isRely_cstr()) + .collect(Collectors.toList()); + } return new NotNullConstraint(notNullConstraints, tblName, dbName); } catch (Exception e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 526d1dd..154ea68 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; @@ -174,7 +175,7 @@ public RelOptMaterialization addMaterializedView(Table materializedViewTable) { return null; } // Add to cache - final String viewQuery = materializedViewTable.getViewOriginalText(); + final String viewQuery = materializedViewTable.getViewExpandedText(); final RelNode tableRel = createTableScan(materializedViewTable); if (tableRel == null) { LOG.warn("Materialized view " + materializedViewTable.getCompleteName() + @@ -226,7 +227,9 @@ public void dropMaterializedView(Table materializedViewTable) { private static RelNode createTableScan(Table viewTable) { // 0. Recreate cluster final RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(null); - final RexBuilder rexBuilder = new RexBuilder(new JavaTypeFactoryImpl()); + final RexBuilder rexBuilder = new RexBuilder( + new JavaTypeFactoryImpl( + new HiveTypeSystemImpl())); final RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder); // 1. Create column schema @@ -338,6 +341,7 @@ private static RelNode parseQuery(String viewQuery) { return analyzer.genLogicalPlan(node); } catch (Exception e) { // We could not parse the view + LOG.error(e.getMessage()); return null; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index a53f774..43e202c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -211,12 +211,9 @@ public void checkValidity(Configuration conf) throws HiveException { } } - if (isView()) { + if (isView() || isMaterializedView()) { assert (getViewOriginalText() != null); assert (getViewExpandedText() != null); - } else if (isMaterializedView()) { - assert(getViewOriginalText() != null); - assert(getViewExpandedText() == null); } else { assert(getViewOriginalText() == null); assert(getViewExpandedText() == null); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java index 7670dc1..4a248d6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java @@ -33,7 +33,6 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField; -import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -134,10 +133,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (table != null) { // case the statement is an INSERT segmentGranularity = table.getParameters().get(Constants.DRUID_SEGMENT_GRANULARITY); - } else { + } else if (parseCtx.getCreateViewDesc() != null) { + // case the statement is a CREATE MATERIALIZED VIEW AS + segmentGranularity = parseCtx.getCreateViewDesc().getTblProps() + .get(Constants.DRUID_SEGMENT_GRANULARITY); + } else if (parseCtx.getCreateTable() != null) { // case the statement is a CREATE TABLE AS - segmentGranularity = parseCtx.getCreateTable().getTblProps() + segmentGranularity = parseCtx.getCreateTable().getTblProps() .get(Constants.DRUID_SEGMENT_GRANULARITY); + } else { + throw new SemanticException("Druid storage handler used but not an INSERT, " + + "CMVAS or CTAS statement"); } segmentGranularity = !Strings.isNullOrEmpty(segmentGranularity) ? segmentGranularity diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 8428615..f70aa75 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -49,6 +49,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexPatternFieldRef; +import org.apache.calcite.rex.RexTableInputRef; import org.apache.calcite.rex.RexRangeRef; import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; @@ -89,7 +90,6 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import org.apache.calcite.rex.RexTableInputRef; /** * Generic utility functions needed for Calcite based Hive CBO. @@ -1034,6 +1034,11 @@ public Boolean visitInputRef(RexInputRef inputRef) { } @Override + public Boolean visitTableInputRef(RexTableInputRef inputRef) { + return false; + } + + @Override public Boolean visitLocalRef(RexLocalRef localRef) { throw new RuntimeException("Not expected to be called."); } @@ -1082,11 +1087,6 @@ public Boolean visitSubQuery(RexSubQuery subQuery) { public Boolean visitPatternFieldRef(RexPatternFieldRef fieldRef) { return false; } - - @Override - public Boolean visitTableInputRef(RexTableInputRef fieldRef) { - return false; - } } public static Set getInputRefs(RexNode expr) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 85aa9b3..297e5c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -35,16 +35,21 @@ import org.apache.calcite.rel.RelFieldCollation.Direction; import org.apache.calcite.rel.RelFieldCollation.NullDirection; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelReferentialConstraint; +import org.apache.calcite.rel.RelReferentialConstraintImpl; import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.calcite.util.mapping.IntPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo; +import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo.ForeignKeyCol; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.PartitionIterable; import org.apache.hadoop.hive.ql.metadata.Table; @@ -76,6 +81,7 @@ private final ImmutableMap hivePartitionColsMap; private final ImmutableList hiveVirtualCols; private final int noOfNonVirtualCols; + private final List referentialConstraints; final HiveConf hiveConf; private double rowCount = -1; @@ -106,6 +112,7 @@ public RelOptHiveTable(RelOptSchema calciteSchema, String qualifiedTblName, this.partitionCache = partitionCache; this.colStatsCache = colStatsCache; this.noColsMissingStats = noColsMissingStats; + this.referentialConstraints = generateReferentialConstraints(); } public RelOptHiveTable copy(RelDataType newRowType) { @@ -150,6 +157,72 @@ public boolean isKey(ImmutableBitSet arg0) { } @Override + public List getReferentialConstraints() { + return referentialConstraints; + } + + private List generateReferentialConstraints() { + final ForeignKeyInfo fki; + try { + fki = Hive.get().getReliableForeignKeys( + hiveTblMetadata.getDbName(), hiveTblMetadata.getTableName()); + } catch (HiveException e) { + throw new RuntimeException(e); + } + ImmutableList.Builder builder = ImmutableList.builder(); + for (List fkCols : fki.getForeignKeys().values()) { + List foreignKeyTableQualifiedName = Lists.newArrayList(name); + String parentDatabaseName = fkCols.get(0).parentDatabaseName; + String parentTableName = fkCols.get(0).parentTableName; + String parentFullyQualifiedName; + if (parentDatabaseName != null && !parentDatabaseName.isEmpty()) { + parentFullyQualifiedName = parentDatabaseName + "." + parentTableName; + } + else { + parentFullyQualifiedName = parentTableName; + } + List parentTableQualifiedName = Lists.newArrayList(parentFullyQualifiedName); + Table parentTab = null; + try { + parentTab = Hive.get().getTable(parentDatabaseName, parentTableName); + } catch (HiveException e) { + throw new RuntimeException(e); + } + if (parentTab == null) { + LOG.error("Table for primary key not found: " + + "databaseName: " + parentDatabaseName+ ", " + + "tableName: " + parentTableName); + return ImmutableList.of(); + } + ImmutableList.Builder keys = ImmutableList.builder(); + for (ForeignKeyCol fkCol : fkCols) { + int fkPos = -1; + for (int i = 0; i < rowType.getFieldNames().size(); i++) { + if (rowType.getFieldNames().get(i).equals(fkCol.childColName)) { + fkPos = i; + break; + } + } + int pkPos = -1; + for (int i = 0; i < parentTab.getAllCols().size(); i++) { + if (parentTab.getAllCols().get(i).getName().equals(fkCol.parentColName)) { + pkPos = i; + break; + } + } + if (fkPos == -1 || pkPos == -1) { + LOG.error("Column for foreign key definition " + fkCol + " not found"); + return ImmutableList.of(); + } + keys.add(IntPair.of(fkPos, pkPos)); + } + builder.add(RelReferentialConstraintImpl.of(foreignKeyTableQualifiedName, + parentTableQualifiedName, keys.build())); + } + return builder.build(); + } + + @Override public RelNode toRel(ToRelContext context) { return new LogicalTableScan(context.getCluster(), this); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java index 2fa1138..a7c6fd0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java @@ -17,22 +17,35 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules.views; -import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyFilterRule; import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewProjectFilterRule; +import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyFilterRule; +import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewProjectJoinRule; +import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyJoinRule; +import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewProjectAggregateRule; +import org.apache.calcite.rel.rules.AbstractMaterializedViewRule.MaterializedViewOnlyAggregateRule; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; /** - * Planner rule that replaces (if possible) a {@link org.apache.calcite.rel.core.Project} - * on a {@link org.apache.calcite.rel.core.Filter} and {@link org.apache.calcite.rel.core.Filter} - * or a {@link org.apache.calcite.rel.core.Filter} on a {@link org.apache.calcite.rel.core.TableScan} - * to use a Materialized View. + * Enable join and aggregate materialized view rewriting */ public class HiveMaterializedViewRule { public static final MaterializedViewProjectFilterRule INSTANCE_PROJECT_FILTER = - new MaterializedViewProjectFilterRule(HiveRelFactories.HIVE_BUILDER, false); + new MaterializedViewProjectFilterRule(HiveRelFactories.HIVE_BUILDER, true); public static final MaterializedViewOnlyFilterRule INSTANCE_FILTER = - new MaterializedViewOnlyFilterRule(HiveRelFactories.HIVE_BUILDER, false); + new MaterializedViewOnlyFilterRule(HiveRelFactories.HIVE_BUILDER, true); + + public static final MaterializedViewProjectJoinRule INSTANCE_PROJECT_JOIN = + new MaterializedViewProjectJoinRule(HiveRelFactories.HIVE_BUILDER, true); + + public static final MaterializedViewOnlyJoinRule INSTANCE_JOIN = + new MaterializedViewOnlyJoinRule(HiveRelFactories.HIVE_BUILDER, true); + + public static final MaterializedViewProjectAggregateRule INSTANCE_PROJECT_AGGREGATE = + new MaterializedViewProjectAggregateRule(HiveRelFactories.HIVE_BUILDER, true); + + public static final MaterializedViewOnlyAggregateRule INSTANCE_AGGREGATE = + new MaterializedViewOnlyAggregateRule(HiveRelFactories.HIVE_BUILDER, true); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 2645fab..59c57d6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -76,6 +76,7 @@ import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.SetOp; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; @@ -139,6 +140,8 @@ import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; +import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; @@ -146,9 +149,9 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; @@ -389,27 +392,34 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept } catch (SemanticException e) { throw new CalciteViewSemanticException(e.getMessage()); } + } else if (cboCtx.type == PreCboCtx.Type.VIEW && materializedView) { + // Store text of the ORIGINAL QUERY + String originalText = ctx.getTokenRewriteStream().toString( + cboCtx.nodeOfInterest.getTokenStartIndex(), + cboCtx.nodeOfInterest.getTokenStopIndex()); + unparseTranslator.applyTranslations(ctx.getTokenRewriteStream()); + String expandedText = ctx.getTokenRewriteStream().toString( + cboCtx.nodeOfInterest.getTokenStartIndex(), + cboCtx.nodeOfInterest.getTokenStopIndex()); + // Redo create-table/view analysis, because it's not part of + // doPhase1. + // Use the REWRITTEN AST + init(false); + setAST(newAST); + newAST = reAnalyzeViewAfterCbo(newAST); + viewSelect = newAST; + viewsExpanded = new ArrayList<>(); + viewsExpanded.add(createVwDesc.getViewName()); + createVwDesc.setViewOriginalText(originalText); + createVwDesc.setViewExpandedText(expandedText); + } else if (cboCtx.type == PreCboCtx.Type.CTAS) { + // CTAS + init(false); + setAST(newAST); + newAST = reAnalyzeCTASAfterCbo(newAST); } else { + // All others init(false); - if (cboCtx.type == PreCboCtx.Type.VIEW && materializedView) { - // Redo create-table/view analysis, because it's not part of - // doPhase1. - // Use the REWRITTEN AST - setAST(newAST); - newAST = reAnalyzeViewAfterCbo(newAST); - // Store text of the ORIGINAL QUERY - String originalText = ctx.getTokenRewriteStream().toString( - cboCtx.nodeOfInterest.getTokenStartIndex(), - cboCtx.nodeOfInterest.getTokenStopIndex()); - createVwDesc.setViewOriginalText(originalText); - viewSelect = newAST; - viewsExpanded = new ArrayList<>(); - viewsExpanded.add(createVwDesc.getViewName()); - } else if (cboCtx.type == PreCboCtx.Type.CTAS) { - // CTAS - setAST(newAST); - newAST = reAnalyzeCTASAfterCbo(newAST); - } } if (oldHints.size() > 0) { if (getQB().getParseInfo().getHints() != null) { @@ -1447,13 +1457,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu disableSemJoinReordering = false; } - // 4. Run other optimizations that do not need stats + // 4.1. Run other optimizations that do not need stats perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, HiveUnionMergeRule.INSTANCE, - HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveAggregateProjectMergeRule.INSTANCE, - HiveJoinCommuteRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats"); + HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveJoinCommuteRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats 1"); // 5. Materialized view based rewriting // We disable it for CTAS and MV creation queries (trying to avoid any problem @@ -1477,17 +1486,30 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu public RelOptMaterialization apply(RelOptMaterialization materialization) { final RelNode viewScan = materialization.tableRel; final RelNode newViewScan; - if (viewScan instanceof DruidQuery) { - final DruidQuery dq = (DruidQuery) viewScan; - newViewScan = DruidQuery.create(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), - viewScan.getTable(), dq.getDruidTable(), + if (viewScan instanceof Project) { + // There is a Project on top (due to nullability) + final Project pq = (Project) viewScan; + newViewScan = HiveProject.create(optCluster, copyNodeScan(viewScan), + pq.getChildExps(), pq.getRowType(), Collections. emptyList()); + } else { + newViewScan = copyNodeScan(viewScan); + } + return new RelOptMaterialization(newViewScan, materialization.queryRel, null); + } + + private RelNode copyNodeScan(RelNode scan) { + final RelNode newScan; + if (scan instanceof DruidQuery) { + final DruidQuery dq = (DruidQuery) scan; + newScan = DruidQuery.create(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), + scan.getTable(), dq.getDruidTable(), ImmutableList.of(dq.getTableScan())); } else { - newViewScan = new HiveTableScan(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), - (RelOptHiveTable) viewScan.getTable(), viewScan.getTable().getQualifiedName().get(0), + newScan = new HiveTableScan(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), + (RelOptHiveTable) scan.getTable(), scan.getTable().getQualifiedName().get(0), null, false, false); } - return new RelOptMaterialization(newViewScan, materialization.queryRel, null); + return newScan; } } ); @@ -1501,6 +1523,10 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) { // Add view-based rewriting rules to planner planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_FILTER); planner.addRule(HiveMaterializedViewRule.INSTANCE_FILTER); + planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_JOIN); + planner.addRule(HiveMaterializedViewRule.INSTANCE_JOIN); + planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE); + planner.addRule(HiveMaterializedViewRule.INSTANCE_AGGREGATE); // Optimize plan planner.setRoot(calciteOptimizedPlan); calciteOptimizedPlan = planner.findBestExp(); @@ -1512,6 +1538,12 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) { perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: View-based rewriting"); } + // 4.2. Run other optimizations that do not need stats + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, + HepMatchOrder.BOTTOM_UP, HiveAggregateProjectMergeRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats 2"); + // 6. Run aggregate-join transpose (cost based) // If it failed because of missing stats, we continue with // the rest of optimizations @@ -1551,9 +1583,7 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) { "Calcite: Removing sq_count_check UDF "); } - - - // 8. Run rule to fix windowing issue when it is done over + // 9. Run rule to fix windowing issue when it is done over // aggregation columns (HIVE-10627) if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); @@ -1562,7 +1592,7 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) { perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Window fixing rule"); } - // 9. Apply Druid transformation rules + // 10. Apply Druid transformation rules perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HepMatchOrder.BOTTOM_UP, @@ -1580,10 +1610,10 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) { ); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Druid transformation rules"); - // 10. Run rules to aid in translation from Calcite tree to Hive tree + // 11. Run rules to aid in translation from Calcite tree to Hive tree if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - // 10.1. Merge join into multijoin operators (if possible) + // 11.1. Merge join into multijoin operators (if possible) calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null, HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER, HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER, @@ -1601,7 +1631,7 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) { HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID, HiveProjectFilterPullUpConstantsRule.INSTANCE); - // 10.2. Introduce exchange operators below join/multijoin operators + // 11.2. Introduce exchange operators below join/multijoin operators calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HepMatchOrder.BOTTOM_UP, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); @@ -2376,7 +2406,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc optTable, druidTable, ImmutableList.of(scan)); } else { // Build row type from field - RelDataType rowType = TypeConverter.getType(cluster, rr, null); + RelDataType rowType = inferNotNullableColumns(tabMetaData, TypeConverter.getType(cluster, rr, null)); // Build RelOptAbstractTable String fullyQualifiedTabName = tabMetaData.getDbName(); if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) { @@ -2412,6 +2442,66 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc return tableRel; } + private RelDataType inferNotNullableColumns(Table tabMetaData, RelDataType rowType) + throws HiveException { + // Retrieve not null constraints + final NotNullConstraint nnc = Hive.get().getReliableNotNullConstraints( + tabMetaData.getDbName(), tabMetaData.getTableName()); + // Retrieve primary key constraints (cannot be null) + final PrimaryKeyInfo pkc = Hive.get().getReliablePrimaryKeys( + tabMetaData.getDbName(), tabMetaData.getTableName()); + if (nnc.getNotNullConstraints().isEmpty() && pkc.getColNames().isEmpty()) { + return rowType; + } + + // Build the bitset with not null columns + ImmutableBitSet.Builder builder = ImmutableBitSet.builder(); + for (String nnCol : nnc.getNotNullConstraints().values()) { + int nnPos = -1; + for (int i = 0; i < rowType.getFieldNames().size(); i++) { + if (rowType.getFieldNames().get(i).equals(nnCol)) { + nnPos = i; + break; + } + } + if (nnPos == -1) { + LOG.error("Column for not null constraint definition " + nnCol + " not found"); + return rowType; + } + builder.set(nnPos); + } + for (String pkCol : pkc.getColNames().values()) { + int pkPos = -1; + for (int i = 0; i < rowType.getFieldNames().size(); i++) { + if (rowType.getFieldNames().get(i).equals(pkCol)) { + pkPos = i; + break; + } + } + if (pkPos == -1) { + LOG.error("Column for not null constraint definition " + pkCol + " not found"); + return rowType; + } + builder.set(pkPos); + } + ImmutableBitSet bitSet = builder.build(); + + RexBuilder rexBuilder = cluster.getRexBuilder(); + RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); + + List fieldTypes = new LinkedList(); + List fieldNames = new LinkedList(); + for (RelDataTypeField rdtf : rowType.getFieldList()) { + if (bitSet.indexOf(rdtf.getIndex()) != -1) { + fieldTypes.add(dtFactory.createTypeWithNullability(rdtf.getType(), false)); + } else { + fieldTypes.add(rdtf.getType()); + } + fieldNames.add(rdtf.getName()); + } + return dtFactory.createStructType(fieldTypes, fieldNames); + } + private TableType obtainTableType(Table tabMetaData) { if (tabMetaData.getStorageHandler() != null && tabMetaData.getStorageHandler().toString().equals( diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1c74779..14ac8e3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -11694,10 +11694,7 @@ protected void saveViewDefinition() throws SemanticException { } createVwDesc.setSchema(derivedSchema); - if (!createVwDesc.isMaterialized()) { - // materialized views don't store the expanded text as they won't be rewritten at query time. - createVwDesc.setViewExpandedText(expandedText); - } + createVwDesc.setViewExpandedText(expandedText); } static List convertRowSchemaToViewSchema(RowResolver rr) throws SemanticException { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java index 6cc4a40..9425f6e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java @@ -76,6 +76,7 @@ public CreateViewDesc() { * @param inputFormat * @param outputFormat * @param location + * @param serName * @param serde * @param storageHandler * @param serdeProps diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index d82973c..1dea6a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -18,6 +18,17 @@ package org.apache.hadoop.hive.ql.plan; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; @@ -32,7 +43,6 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; @@ -67,17 +77,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - /** * PlanUtils. * @@ -412,36 +411,63 @@ public static TableDesc getTableDesc(CreateViewDesc crtViewDesc, String cols, St TableDesc ret; try { - Class serdeClass = JavaUtils.loadClass(crtViewDesc.getSerde()); - ret = getTableDesc(serdeClass, new String(LazySerDeParameters.DefaultSeparators), cols, - colTypes, false, false); + HiveStorageHandler storageHandler = null; + if (crtViewDesc.getStorageHandler() != null) { + storageHandler = HiveUtils.getStorageHandler( + SessionState.getSessionConf(), crtViewDesc.getStorageHandler()); + } + + Class serdeClass = LazySimpleSerDe.class; + String separatorCode = Integer.toString(Utilities.ctrlaCode); + String columns = cols; + String columnTypes = colTypes; + boolean lastColumnTakesRestOfTheLine = false; + + if (storageHandler != null) { + serdeClass = storageHandler.getSerDeClass(); + } else if (crtViewDesc.getSerde() != null) { + serdeClass = JavaUtils.loadClass(crtViewDesc.getSerde()); + } + + ret = getTableDesc(serdeClass, separatorCode, columns, columnTypes, + lastColumnTakesRestOfTheLine, false); // set other table properties - /* - TODO - I don't think I need any of this Properties properties = ret.getProperties(); - if (crtTblDesc.getTableName() != null && crtTblDesc.getDatabaseName() != null) { - properties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, - crtTblDesc.getTableName()); + if (crtViewDesc.getStorageHandler() != null) { + properties.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + crtViewDesc.getStorageHandler()); } - if (crtTblDesc.getTblProps() != null) { - properties.putAll(crtTblDesc.getTblProps()); + if (crtViewDesc.getTblProps() != null) { + properties.putAll(crtViewDesc.getTblProps()); + } + if (crtViewDesc.getSerdeProps() != null) { + properties.putAll(crtViewDesc.getSerdeProps()); } - */ // replace the default input & output file format with those found in // crtTblDesc - Class inClass = - (Class)JavaUtils.loadClass(crtViewDesc.getInputFormat()); - Class outClass = - (Class)JavaUtils.loadClass(crtViewDesc.getOutputFormat()); - - ret.setInputFileFormatClass(inClass); - ret.setOutputFileFormatClass(outClass); + Class in_class; + if (storageHandler != null) { + in_class = storageHandler.getInputFormatClass(); + } else { + in_class = JavaUtils.loadClass(crtViewDesc.getInputFormat()); + } + Class out_class; + if (storageHandler != null) { + out_class = storageHandler.getOutputFormatClass(); + } else { + out_class = JavaUtils.loadClass(crtViewDesc.getOutputFormat()); + } + ret.setInputFileFormatClass(in_class); + ret.setOutputFileFormatClass(out_class); } catch (ClassNotFoundException e) { throw new RuntimeException("Unable to find class in getTableDesc: " + e.getMessage(), e); + } catch (HiveException e) { + throw new RuntimeException("Error loading storage handler in getTableDesc: " + e.getMessage(), e); } return ret; } diff --git ql/src/test/queries/clientpositive/materialized_view_create_rewrite_2.q ql/src/test/queries/clientpositive/materialized_view_create_rewrite_2.q new file mode 100644 index 0000000..9983bae --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_create_rewrite_2.q @@ -0,0 +1,88 @@ +set hive.strict.checks.cartesian.product=false; +set hive.materializedview.rewriting=true; +set hive.stats.column.autogather=true; + +create table cmv_basetable (a int, b varchar(256), c decimal(10,2), d int); + +insert into cmv_basetable values + (1, 'alfred', 10.30, 2), + (2, 'bob', 3.14, 3), + (2, 'bonnie', 172342.2, 3), + (3, 'calvin', 978.76, 3), + (3, 'charlie', 9.8, 1); + +create materialized view cmv_mat_view enable rewrite +as select b from cmv_basetable where c > 10.0 group by a, b, c; + +-- CANNOT BE TRIGGERED +explain +select b from cmv_basetable where c > 20.0 group by a, b; + +select b from cmv_basetable where c > 20.0 group by a, b; + +create materialized view cmv_mat_view_2 enable rewrite +as select b, c from cmv_basetable where c > 10.0 group by a, b, c; + +-- CANNOT BE TRIGGERED +explain +select b from cmv_basetable where c > 20.0 group by a, b; + +select b from cmv_basetable where c > 20.0 group by a, b; + +create materialized view cmv_mat_view_3 enable rewrite +as select a, b, c from cmv_basetable where c > 10.0 group by a, b, c; + +-- CAN BE TRIGGERED +explain +select b from cmv_basetable where c > 20.0 group by a, b; + +select b from cmv_basetable where c > 20.0 group by a, b; + +create materialized view cmv_mat_view_4 enable rewrite +as select a, b from cmv_basetable group by a, b; + +-- CAN BE TRIGGERED +explain +select b from cmv_basetable group by b; + +select b from cmv_basetable group by b; + +create table cmv_basetable_2 (a int, b varchar(256), c decimal(10,2), d int); + +insert into cmv_basetable_2 values + (1, 'alfred', 10.30, 2), + (3, 'calvin', 978.76, 3); + +create materialized view cmv_mat_view_5 enable rewrite +as select cmv_basetable.a, cmv_basetable_2.c + from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) + where cmv_basetable_2.c > 10.0 + group by cmv_basetable.a, cmv_basetable_2.c; + +explain +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a, cmv_basetable_2.c; + +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a, cmv_basetable_2.c; + +explain +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a; + +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a; + +drop materialized view cmv_mat_view; +drop materialized view cmv_mat_view_2; +drop materialized view cmv_mat_view_3; +drop materialized view cmv_mat_view_4; +drop materialized view cmv_mat_view_5; diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb.q ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb.q new file mode 100644 index 0000000..00da517 --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb.q @@ -0,0 +1,346 @@ +set hive.strict.checks.cartesian.product=false; +set hive.materializedview.rewriting=true; +set hive.stats.column.autogather=true; + +CREATE TABLE `customer`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `dates`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely +) +STORED AS ORC; + +CREATE TABLE `ssb_part`( + `p_partkey` bigint, + `p_name` string, + `p_mfgr` string, + `p_category` string, + `p_brand1` string, + `p_color` string, + `p_type` string, + `p_size` int, + `p_container` string, + primary key (`p_partkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `supplier`( + `s_suppkey` bigint, + `s_name` string, + `s_address` string, + `s_city` string, + `s_nation` string, + `s_region` string, + `s_phone` string, + primary key (`s_suppkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `lineorder`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint not null disable rely, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates`(`d_datekey`) disable rely, + constraint fk3 foreign key (`lo_partkey`) references `ssb_part`(`p_partkey`) disable rely, + constraint fk4 foreign key (`lo_suppkey`) references `supplier`(`s_suppkey`) disable rely) +STORED AS ORC; + +analyze table customer compute statistics for columns; +analyze table dates compute statistics for columns; +analyze table ssb_part compute statistics for columns; +analyze table supplier compute statistics for columns; +analyze table lineorder compute statistics for columns; + +CREATE MATERIALIZED VIEW `ssb_mv` ENABLE REWRITE +AS +SELECT + c_city, + c_nation, + c_region, + d_weeknuminyear, + d_year, + d_yearmonth, + d_yearmonthnum, + lo_discount, + lo_quantity, + p_brand1, + p_category, + p_mfgr, + s_city, + s_nation, + s_region, + lo_revenue, + lo_extendedprice * lo_discount discounted_price, + lo_revenue - lo_supplycost net_revenue +FROM + customer, dates, lineorder, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and lo_custkey = c_custkey; + +-- Q1.1 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_year = 1993 + and lo_discount between 1 and 3 + and lo_quantity < 25; + +-- Q1.2 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_yearmonthnum = 199401 + and lo_discount between 4 and 6 + and lo_quantity between 26 and 35; + +-- Q1.3 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_weeknuminyear = 6 + and d_year = 1994 + and lo_discount between 5 and 7 + and lo_quantity between 26 and 35; + +-- Q2.1 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_category = 'MFGR#12' + and s_region = 'AMERICA' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q2.2 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 between 'MFGR#2221' and 'MFGR#2228' + and s_region = 'ASIA' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q2.3 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 = 'MFGR#2239' + and s_region = 'EUROPE' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q3.1 +explain +select + c_nation, s_nation, d_year, + sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_region = 'ASIA' + and s_region = 'ASIA' + and d_year >= 1992 and d_year <= 1997 +group by + c_nation, s_nation, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.2 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED STATES' + and s_nation = 'UNITED STATES' + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.3 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.4 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_yearmonth = 'Dec1997' +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q4.1 +explain +select + d_year, c_nation, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, c_nation +order by + d_year, c_nation; + +-- Q4.2 +explain +select + d_year, s_nation, p_category, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (d_year = 1997 or d_year = 1998) + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, s_nation, p_category +order by + d_year, s_nation, p_category; + +-- Q4.3 +explain +select + d_year, s_city, p_brand1, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_nation = 'UNITED STATES' + and (d_year = 1997 or d_year = 1998) + and p_category = 'MFGR#14' +group by + d_year, s_city, p_brand1 +order by + d_year, s_city, p_brand1; + +DROP MATERIALIZED VIEW `ssb_mv`; diff --git ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb_2.q ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb_2.q new file mode 100644 index 0000000..d38641f --- /dev/null +++ ql/src/test/queries/clientpositive/materialized_view_rewrite_ssb_2.q @@ -0,0 +1,347 @@ +set hive.strict.checks.cartesian.product=false; +set hive.materializedview.rewriting=true; +set hive.stats.column.autogather=true; + +CREATE TABLE `customer`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `dates`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely +) +STORED AS ORC; + +CREATE TABLE `ssb_part`( + `p_partkey` bigint, + `p_name` string, + `p_mfgr` string, + `p_category` string, + `p_brand1` string, + `p_color` string, + `p_type` string, + `p_size` int, + `p_container` string, + primary key (`p_partkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `supplier`( + `s_suppkey` bigint, + `s_name` string, + `s_address` string, + `s_city` string, + `s_nation` string, + `s_region` string, + `s_phone` string, + primary key (`s_suppkey`) disable rely) +STORED AS ORC; + +CREATE TABLE `lineorder`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint not null disable rely, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates`(`d_datekey`) disable rely, + constraint fk3 foreign key (`lo_partkey`) references `ssb_part`(`p_partkey`) disable rely, + constraint fk4 foreign key (`lo_suppkey`) references `supplier`(`s_suppkey`) disable rely) +STORED AS ORC; + +analyze table customer compute statistics for columns; +analyze table dates compute statistics for columns; +analyze table ssb_part compute statistics for columns; +analyze table supplier compute statistics for columns; +analyze table lineorder compute statistics for columns; + +CREATE MATERIALIZED VIEW `ssb_mv` ENABLE REWRITE +AS +SELECT + cast(d_year || '-' || d_monthnuminyear || '-' || d_daynuminmonth as timestamp) as `__time`, + c_city, + c_nation, + c_region, + cast(d_weeknuminyear as string) d_weeknuminyear, + cast(d_year as string) d_year, + d_yearmonth, + cast(d_yearmonthnum as string) d_yearmonthnum, + cast(lo_discount as string) lo_discount, + cast(lo_quantity as string) lo_quantity, + p_brand1, + p_category, + p_mfgr, + s_city, + s_nation, + s_region, + lo_revenue, + lo_extendedprice * lo_discount discounted_price, + lo_revenue - lo_supplycost net_revenue +FROM + customer, dates, lineorder, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and lo_custkey = c_custkey; + +-- Q1.1 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_year = 1993 + and lo_discount between 1 and 3 + and lo_quantity < 25; + +-- Q1.2 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_yearmonthnum = 199401 + and lo_discount between 4 and 6 + and lo_quantity between 26 and 35; + +-- Q1.3 +explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_weeknuminyear = 6 + and d_year = 1994 + and lo_discount between 5 and 7 + and lo_quantity between 26 and 35; + +-- Q2.1 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_category = 'MFGR#12' + and s_region = 'AMERICA' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q2.2 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 between 'MFGR#2221' and 'MFGR#2228' + and s_region = 'ASIA' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q2.3 +explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 = 'MFGR#2239' + and s_region = 'EUROPE' +group by + d_year, p_brand1 +order by + d_year, p_brand1; + +-- Q3.1 +explain +select + c_nation, s_nation, d_year, + sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_region = 'ASIA' + and s_region = 'ASIA' + and d_year >= 1992 and d_year <= 1997 +group by + c_nation, s_nation, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.2 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED STATES' + and s_nation = 'UNITED STATES' + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.3 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q3.4 +explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_yearmonth = 'Dec1997' +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc; + +-- Q4.1 +explain +select + d_year, c_nation, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, c_nation +order by + d_year, c_nation; + +-- Q4.2 +explain +select + d_year, s_nation, p_category, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (d_year = 1997 or d_year = 1998) + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, s_nation, p_category +order by + d_year, s_nation, p_category; + +-- Q4.3 +explain +select + d_year, s_city, p_brand1, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_nation = 'UNITED STATES' + and (d_year = 1997 or d_year = 1998) + and p_category = 'MFGR#14' +group by + d_year, s_city, p_brand1 +order by + d_year, s_city, p_brand1; + +DROP MATERIALIZED VIEW `ssb_mv`; diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index ea3c78b..c04c3bc 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -5734,14 +5734,14 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (SIMPLE_EDGE) - Reducer 11 <- Map 9 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Map 4 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Map 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 8 <- Map 4 (SIMPLE_EDGE) + Reducer 9 <- Map 11 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5761,7 +5761,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 11 Map Operator Tree: TableScan alias: t2 @@ -5784,7 +5784,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 9 + Map 4 Map Operator Tree: TableScan alias: t1 @@ -5810,30 +5810,23 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: char(100)) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: char(100)) - Reducer 11 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: char(100)) + keys: KEY._col0 (type: int), KEY._col1 (type: char(100)) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: char(100)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: char(100)), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: char(100)) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -5879,6 +5872,20 @@ STAGE PLANS: Reducer 5 Execution mode: llap Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(100)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: char(100)) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 @@ -5888,8 +5895,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), count(_col0) - keys: _col1 (type: char(100)) + aggregations: count(), count(_col1) + keys: _col0 (type: char(100)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -5899,7 +5906,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: char(100)) Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5914,7 +5921,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: char(100)) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 7 + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(100)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: char(100)) + Reducer 9 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -5935,27 +5956,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: char(100)) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: char(100)), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: char(100)) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/materialized_view_create_rewrite_2.q.out ql/src/test/results/clientpositive/materialized_view_create_rewrite_2.q.out new file mode 100644 index 0000000..c4bee9c --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_create_rewrite_2.q.out @@ -0,0 +1,574 @@ +PREHOOK: query: create table cmv_basetable (a int, b varchar(256), c decimal(10,2), d int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_basetable +POSTHOOK: query: create table cmv_basetable (a int, b varchar(256), c decimal(10,2), d int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_basetable +PREHOOK: query: insert into cmv_basetable values + (1, 'alfred', 10.30, 2), + (2, 'bob', 3.14, 3), + (2, 'bonnie', 172342.2, 3), + (3, 'calvin', 978.76, 3), + (3, 'charlie', 9.8, 1) +PREHOOK: type: QUERY +PREHOOK: Output: default@cmv_basetable +POSTHOOK: query: insert into cmv_basetable values + (1, 'alfred', 10.30, 2), + (2, 'bob', 3.14, 3), + (2, 'bonnie', 172342.2, 3), + (3, 'calvin', 978.76, 3), + (3, 'charlie', 9.8, 1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@cmv_basetable +POSTHOOK: Lineage: cmv_basetable.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: cmv_basetable.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: cmv_basetable.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: cmv_basetable.d EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: create materialized view cmv_mat_view enable rewrite +as select b from cmv_basetable where c > 10.0 group by a, b, c +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_basetable +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_mat_view +POSTHOOK: query: create materialized view cmv_mat_view enable rewrite +as select b from cmv_basetable where c > 10.0 group by a, b, c +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_mat_view +PREHOOK: query: explain +select b from cmv_basetable where c > 20.0 group by a, b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select b from cmv_basetable where c > 20.0 group by a, b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cmv_basetable + Statistics: Num rows: 5 Data size: 81 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c > 20) (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: varchar(256)) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: int), b (type: varchar(256)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: varchar(256)) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select b from cmv_basetable where c > 20.0 group by a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable +#### A masked pattern was here #### +POSTHOOK: query: select b from cmv_basetable where c > 20.0 group by a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable +#### A masked pattern was here #### +bonnie +calvin +PREHOOK: query: create materialized view cmv_mat_view_2 enable rewrite +as select b, c from cmv_basetable where c > 10.0 group by a, b, c +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_basetable +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_mat_view_2 +POSTHOOK: query: create materialized view cmv_mat_view_2 enable rewrite +as select b, c from cmv_basetable where c > 10.0 group by a, b, c +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_mat_view_2 +PREHOOK: query: explain +select b from cmv_basetable where c > 20.0 group by a, b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select b from cmv_basetable where c > 20.0 group by a, b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cmv_basetable + Statistics: Num rows: 5 Data size: 81 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c > 20) (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: varchar(256)) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: int), b (type: varchar(256)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: varchar(256)) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select b from cmv_basetable where c > 20.0 group by a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable +#### A masked pattern was here #### +POSTHOOK: query: select b from cmv_basetable where c > 20.0 group by a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable +#### A masked pattern was here #### +bonnie +calvin +PREHOOK: query: create materialized view cmv_mat_view_3 enable rewrite +as select a, b, c from cmv_basetable where c > 10.0 group by a, b, c +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_basetable +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_mat_view_3 +POSTHOOK: query: create materialized view cmv_mat_view_3 enable rewrite +as select a, b, c from cmv_basetable where c > 10.0 group by a, b, c +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_mat_view_3 +PREHOOK: query: explain +select b from cmv_basetable where c > 20.0 group by a, b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select b from cmv_basetable where c > 20.0 group by a, b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.cmv_mat_view_3 + Statistics: Num rows: 1 Data size: 483 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c > 20) (type: boolean) + Statistics: Num rows: 1 Data size: 483 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: varchar(256)) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 483 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: int), b (type: varchar(256)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 483 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: varchar(256)) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 483 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 483 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 483 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 483 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select b from cmv_basetable where c > 20.0 group by a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable +PREHOOK: Input: default@cmv_mat_view_3 +#### A masked pattern was here #### +POSTHOOK: query: select b from cmv_basetable where c > 20.0 group by a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Input: default@cmv_mat_view_3 +#### A masked pattern was here #### +bonnie +calvin +PREHOOK: query: create materialized view cmv_mat_view_4 enable rewrite +as select a, b from cmv_basetable group by a, b +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_basetable +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_mat_view_4 +POSTHOOK: query: create materialized view cmv_mat_view_4 enable rewrite +as select a, b from cmv_basetable group by a, b +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_mat_view_4 +PREHOOK: query: explain +select b from cmv_basetable group by b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select b from cmv_basetable group by b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.cmv_mat_view_4 + Statistics: Num rows: 1 Data size: 342 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: varchar(256)) + outputColumnNames: b + Statistics: Num rows: 1 Data size: 342 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: b (type: varchar(256)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 342 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 1 Data size: 342 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: varchar(256)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 342 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 342 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select b from cmv_basetable group by b +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable +PREHOOK: Input: default@cmv_mat_view_4 +#### A masked pattern was here #### +POSTHOOK: query: select b from cmv_basetable group by b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Input: default@cmv_mat_view_4 +#### A masked pattern was here #### +alfred +bob +bonnie +calvin +charlie +PREHOOK: query: create table cmv_basetable_2 (a int, b varchar(256), c decimal(10,2), d int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_basetable_2 +POSTHOOK: query: create table cmv_basetable_2 (a int, b varchar(256), c decimal(10,2), d int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_basetable_2 +PREHOOK: query: insert into cmv_basetable_2 values + (1, 'alfred', 10.30, 2), + (3, 'calvin', 978.76, 3) +PREHOOK: type: QUERY +PREHOOK: Output: default@cmv_basetable_2 +POSTHOOK: query: insert into cmv_basetable_2 values + (1, 'alfred', 10.30, 2), + (3, 'calvin', 978.76, 3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@cmv_basetable_2 +POSTHOOK: Lineage: cmv_basetable_2.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: cmv_basetable_2.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: cmv_basetable_2.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: cmv_basetable_2.d EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: create materialized view cmv_mat_view_5 enable rewrite +as select cmv_basetable.a, cmv_basetable_2.c + from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) + where cmv_basetable_2.c > 10.0 + group by cmv_basetable.a, cmv_basetable_2.c +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_basetable +PREHOOK: Input: default@cmv_basetable_2 +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_mat_view_5 +POSTHOOK: query: create materialized view cmv_mat_view_5 enable rewrite +as select cmv_basetable.a, cmv_basetable_2.c + from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) + where cmv_basetable_2.c > 10.0 + group by cmv_basetable.a, cmv_basetable_2.c +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Input: default@cmv_basetable_2 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_mat_view_5 +PREHOOK: query: explain +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a, cmv_basetable_2.c +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a, cmv_basetable_2.c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.cmv_mat_view_5 + Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c > 10.1) (type: boolean) + Statistics: Num rows: 1 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a, cmv_basetable_2.c +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable +PREHOOK: Input: default@cmv_basetable_2 +PREHOOK: Input: default@cmv_mat_view_5 +#### A masked pattern was here #### +POSTHOOK: query: select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a, cmv_basetable_2.c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Input: default@cmv_basetable_2 +POSTHOOK: Input: default@cmv_mat_view_5 +#### A masked pattern was here #### +1 +3 +PREHOOK: query: explain +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.cmv_mat_view_5 + Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c > 10.1) (type: boolean) + Statistics: Num rows: 1 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a +PREHOOK: type: QUERY +PREHOOK: Input: default@cmv_basetable +PREHOOK: Input: default@cmv_basetable_2 +PREHOOK: Input: default@cmv_mat_view_5 +#### A masked pattern was here #### +POSTHOOK: query: select cmv_basetable.a +from cmv_basetable join cmv_basetable_2 on (cmv_basetable.a = cmv_basetable_2.a) +where cmv_basetable_2.c > 10.10 +group by cmv_basetable.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Input: default@cmv_basetable_2 +POSTHOOK: Input: default@cmv_mat_view_5 +#### A masked pattern was here #### +1 +3 +PREHOOK: query: drop materialized view cmv_mat_view +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_mat_view +PREHOOK: Output: default@cmv_mat_view +POSTHOOK: query: drop materialized view cmv_mat_view +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_mat_view +POSTHOOK: Output: default@cmv_mat_view +PREHOOK: query: drop materialized view cmv_mat_view_2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_mat_view_2 +PREHOOK: Output: default@cmv_mat_view_2 +POSTHOOK: query: drop materialized view cmv_mat_view_2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_mat_view_2 +POSTHOOK: Output: default@cmv_mat_view_2 +PREHOOK: query: drop materialized view cmv_mat_view_3 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_mat_view_3 +PREHOOK: Output: default@cmv_mat_view_3 +POSTHOOK: query: drop materialized view cmv_mat_view_3 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_mat_view_3 +POSTHOOK: Output: default@cmv_mat_view_3 +PREHOOK: query: drop materialized view cmv_mat_view_4 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_mat_view_4 +PREHOOK: Output: default@cmv_mat_view_4 +POSTHOOK: query: drop materialized view cmv_mat_view_4 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_mat_view_4 +POSTHOOK: Output: default@cmv_mat_view_4 +PREHOOK: query: drop materialized view cmv_mat_view_5 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_mat_view_5 +PREHOOK: Output: default@cmv_mat_view_5 +POSTHOOK: query: drop materialized view cmv_mat_view_5 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_mat_view_5 +POSTHOOK: Output: default@cmv_mat_view_5 diff --git ql/src/test/results/clientpositive/materialized_view_describe.q.out ql/src/test/results/clientpositive/materialized_view_describe.q.out index 92de293..f5daa67 100644 --- ql/src/test/results/clientpositive/materialized_view_describe.q.out +++ ql/src/test/results/clientpositive/materialized_view_describe.q.out @@ -82,7 +82,7 @@ Sort Columns: [] # View Information View Original Text: select a, c from cmv_basetable -View Expanded Text: null +View Expanded Text: select `cmv_basetable`.`a`, `cmv_basetable`.`c` from `default`.`cmv_basetable` View Rewrite Enabled: No PREHOOK: query: show tblproperties cmv_mat_view PREHOOK: type: SHOW_TBLPROPERTIES @@ -157,7 +157,7 @@ Table Parameters: # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] @@ -165,7 +165,7 @@ Sort Columns: [] # View Information View Original Text: select a from cmv_basetable -View Expanded Text: null +View Expanded Text: select `cmv_basetable`.`a` from `default`.`cmv_basetable` View Rewrite Enabled: No PREHOOK: query: select a from cmv_mat_view2 PREHOOK: type: QUERY @@ -241,7 +241,7 @@ Sort Columns: [] # View Information View Original Text: select * from cmv_basetable -View Expanded Text: null +View Expanded Text: select `cmv_basetable`.`a`, `cmv_basetable`.`b`, `cmv_basetable`.`c` from `default`.`cmv_basetable` View Rewrite Enabled: No PREHOOK: query: select a, b, c from cmv_mat_view3 PREHOOK: type: QUERY @@ -320,7 +320,7 @@ Table Parameters: # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] @@ -328,7 +328,7 @@ Sort Columns: [] # View Information View Original Text: select a from cmv_basetable -View Expanded Text: null +View Expanded Text: select `cmv_basetable`.`a` from `default`.`cmv_basetable` View Rewrite Enabled: No PREHOOK: query: select a from cmv_mat_view4 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_ssb.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_ssb.q.out new file mode 100644 index 0000000..77e9ddc --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_ssb.q.out @@ -0,0 +1,1621 @@ +PREHOOK: query: CREATE TABLE `customer`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer +POSTHOOK: query: CREATE TABLE `customer`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer +PREHOOK: query: CREATE TABLE `dates`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely +) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dates +POSTHOOK: query: CREATE TABLE `dates`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely +) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dates +PREHOOK: query: CREATE TABLE `ssb_part`( + `p_partkey` bigint, + `p_name` string, + `p_mfgr` string, + `p_category` string, + `p_brand1` string, + `p_color` string, + `p_type` string, + `p_size` int, + `p_container` string, + primary key (`p_partkey`) disable rely) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ssb_part +POSTHOOK: query: CREATE TABLE `ssb_part`( + `p_partkey` bigint, + `p_name` string, + `p_mfgr` string, + `p_category` string, + `p_brand1` string, + `p_color` string, + `p_type` string, + `p_size` int, + `p_container` string, + primary key (`p_partkey`) disable rely) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ssb_part +PREHOOK: query: CREATE TABLE `supplier`( + `s_suppkey` bigint, + `s_name` string, + `s_address` string, + `s_city` string, + `s_nation` string, + `s_region` string, + `s_phone` string, + primary key (`s_suppkey`) disable rely) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@supplier +POSTHOOK: query: CREATE TABLE `supplier`( + `s_suppkey` bigint, + `s_name` string, + `s_address` string, + `s_city` string, + `s_nation` string, + `s_region` string, + `s_phone` string, + primary key (`s_suppkey`) disable rely) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@supplier +PREHOOK: query: CREATE TABLE `lineorder`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint not null disable rely, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates`(`d_datekey`) disable rely, + constraint fk3 foreign key (`lo_partkey`) references `ssb_part`(`p_partkey`) disable rely, + constraint fk4 foreign key (`lo_suppkey`) references `supplier`(`s_suppkey`) disable rely) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineorder +POSTHOOK: query: CREATE TABLE `lineorder`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint not null disable rely, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates`(`d_datekey`) disable rely, + constraint fk3 foreign key (`lo_partkey`) references `ssb_part`(`p_partkey`) disable rely, + constraint fk4 foreign key (`lo_suppkey`) references `supplier`(`s_suppkey`) disable rely) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineorder +PREHOOK: query: analyze table customer compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +#### A masked pattern was here #### +POSTHOOK: query: analyze table customer compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +#### A masked pattern was here #### +PREHOOK: query: analyze table dates compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@dates +#### A masked pattern was here #### +POSTHOOK: query: analyze table dates compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates +#### A masked pattern was here #### +PREHOOK: query: analyze table ssb_part compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@ssb_part +#### A masked pattern was here #### +POSTHOOK: query: analyze table ssb_part compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ssb_part +#### A masked pattern was here #### +PREHOOK: query: analyze table supplier compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@supplier +#### A masked pattern was here #### +POSTHOOK: query: analyze table supplier compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@supplier +#### A masked pattern was here #### +PREHOOK: query: analyze table lineorder compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@lineorder +#### A masked pattern was here #### +POSTHOOK: query: analyze table lineorder compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineorder +#### A masked pattern was here #### +PREHOOK: query: CREATE MATERIALIZED VIEW `ssb_mv` ENABLE REWRITE +AS +SELECT + c_city, + c_nation, + c_region, + d_weeknuminyear, + d_year, + d_yearmonth, + d_yearmonthnum, + lo_discount, + lo_quantity, + p_brand1, + p_category, + p_mfgr, + s_city, + s_nation, + s_region, + lo_revenue, + lo_extendedprice * lo_discount discounted_price, + lo_revenue - lo_supplycost net_revenue +FROM + customer, dates, lineorder, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and lo_custkey = c_custkey +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@customer +PREHOOK: Input: default@dates +PREHOOK: Input: default@lineorder +PREHOOK: Input: default@ssb_part +PREHOOK: Input: default@supplier +PREHOOK: Output: database:default +PREHOOK: Output: default@ssb_mv +POSTHOOK: query: CREATE MATERIALIZED VIEW `ssb_mv` ENABLE REWRITE +AS +SELECT + c_city, + c_nation, + c_region, + d_weeknuminyear, + d_year, + d_yearmonth, + d_yearmonthnum, + lo_discount, + lo_quantity, + p_brand1, + p_category, + p_mfgr, + s_city, + s_nation, + s_region, + lo_revenue, + lo_extendedprice * lo_discount discounted_price, + lo_revenue - lo_supplycost net_revenue +FROM + customer, dates, lineorder, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and lo_custkey = c_custkey +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@dates +POSTHOOK: Input: default@lineorder +POSTHOOK: Input: default@ssb_part +POSTHOOK: Input: default@supplier +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ssb_mv +PREHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_year = 1993 + and lo_discount between 1 and 3 + and lo_quantity < 25 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_year = 1993 + and lo_discount between 1 and 3 + and lo_quantity < 25 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((lo_quantity < 25.0) and (d_year = 1993) and lo_discount BETWEEN 1.0 AND 3.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: discounted_price (type: double) + outputColumnNames: discounted_price + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(discounted_price) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_yearmonthnum = 199401 + and lo_discount between 4 and 6 + and lo_quantity between 26 and 35 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_yearmonthnum = 199401 + and lo_discount between 4 and 6 + and lo_quantity between 26 and 35 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((d_yearmonthnum = 199401) and lo_discount BETWEEN 4.0 AND 6.0 and lo_quantity BETWEEN 26.0 AND 35.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: discounted_price (type: double) + outputColumnNames: discounted_price + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(discounted_price) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_weeknuminyear = 6 + and d_year = 1994 + and lo_discount between 5 and 7 + and lo_quantity between 26 and 35 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_weeknuminyear = 6 + and d_year = 1994 + and lo_discount between 5 and 7 + and lo_quantity between 26 and 35 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((d_weeknuminyear = 6) and (d_year = 1994) and lo_quantity BETWEEN 26.0 AND 35.0 and lo_discount BETWEEN 5.0 AND 7.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: discounted_price (type: double) + outputColumnNames: discounted_price + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(discounted_price) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_category = 'MFGR#12' + and s_region = 'AMERICA' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_category = 'MFGR#12' + and s_region = 'AMERICA' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((s_region = 'AMERICA') and (p_category = 'MFGR#12')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: d_year (type: int), p_brand1 (type: string), lo_revenue (type: double) + outputColumnNames: d_year, p_brand1, lo_revenue + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(lo_revenue) + keys: d_year (type: int), p_brand1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: double), _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: double), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 between 'MFGR#2221' and 'MFGR#2228' + and s_region = 'ASIA' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 between 'MFGR#2221' and 'MFGR#2228' + and s_region = 'ASIA' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((s_region = 'ASIA') and p_brand1 BETWEEN 'MFGR#2221' AND 'MFGR#2228') (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: d_year (type: int), p_brand1 (type: string), lo_revenue (type: double) + outputColumnNames: d_year, p_brand1, lo_revenue + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(lo_revenue) + keys: d_year (type: int), p_brand1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: double), _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: double), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 = 'MFGR#2239' + and s_region = 'EUROPE' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 = 'MFGR#2239' + and s_region = 'EUROPE' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((p_brand1 = 'MFGR#2239') and (s_region = 'EUROPE')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: d_year (type: int), lo_revenue (type: double) + outputColumnNames: d_year, lo_revenue + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(lo_revenue) + keys: d_year (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: double), KEY.reducesinkkey0 (type: int), 'MFGR#2239' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + c_nation, s_nation, d_year, + sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_region = 'ASIA' + and s_region = 'ASIA' + and d_year >= 1992 and d_year <= 1997 +group by + c_nation, s_nation, d_year +order by + d_year asc, lo_revenue desc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + c_nation, s_nation, d_year, + sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_region = 'ASIA' + and s_region = 'ASIA' + and d_year >= 1992 and d_year <= 1997 +group by + c_nation, s_nation, d_year +order by + d_year asc, lo_revenue desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((d_year >= 1992) and (d_year <= 1997) and (c_region = 'ASIA') and (s_region = 'ASIA')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c_nation (type: string), d_year (type: int), s_nation (type: string), lo_revenue (type: double) + outputColumnNames: c_nation, d_year, s_nation, lo_revenue + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(lo_revenue) + keys: d_year (type: int), c_nation (type: string), s_nation (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col0 (type: int), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: double) + sort order: +- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED STATES' + and s_nation = 'UNITED STATES' + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED STATES' + and s_nation = 'UNITED STATES' + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((d_year >= 1992) and (d_year <= 1997) and (c_nation = 'UNITED STATES') and (s_nation = 'UNITED STATES')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c_city (type: string), d_year (type: int), s_city (type: string), lo_revenue (type: double) + outputColumnNames: c_city, d_year, s_city, lo_revenue + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(lo_revenue) + keys: d_year (type: int), c_city (type: string), s_city (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col0 (type: int), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: double) + sort order: +- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((d_year >= 1992) and (d_year <= 1997) and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(lo_revenue) + keys: d_year (type: int), c_city (type: string), s_city (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col0 (type: int), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: double) + sort order: +- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_yearmonth = 'Dec1997' +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_yearmonth = 'Dec1997' +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((d_yearmonth = 'Dec1997') and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c_city (type: string), d_year (type: int), s_city (type: string), lo_revenue (type: double) + outputColumnNames: c_city, d_year, s_city, lo_revenue + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(lo_revenue) + keys: d_year (type: int), c_city (type: string), s_city (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col0 (type: int), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: double) + sort order: +- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + d_year, c_nation, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, c_nation +order by + d_year, c_nation +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + d_year, c_nation, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, c_nation +order by + d_year, c_nation +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((s_region = 'AMERICA') and (c_region = 'AMERICA') and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c_nation (type: string), d_year (type: int), net_revenue (type: double) + outputColumnNames: c_nation, d_year, net_revenue + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(net_revenue) + keys: d_year (type: int), c_nation (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + d_year, s_nation, p_category, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (d_year = 1997 or d_year = 1998) + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, s_nation, p_category +order by + d_year, s_nation, p_category +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + d_year, s_nation, p_category, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (d_year = 1997 or d_year = 1998) + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, s_nation, p_category +order by + d_year, s_nation, p_category +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((c_region = 'AMERICA') and (s_region = 'AMERICA') and ((d_year = 1997) or (d_year = 1998)) and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: d_year (type: int), p_category (type: string), s_nation (type: string), net_revenue (type: double) + outputColumnNames: d_year, p_category, s_nation, net_revenue + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(net_revenue) + keys: d_year (type: int), s_nation (type: string), p_category (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + d_year, s_city, p_brand1, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_nation = 'UNITED STATES' + and (d_year = 1997 or d_year = 1998) + and p_category = 'MFGR#14' +group by + d_year, s_city, p_brand1 +order by + d_year, s_city, p_brand1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + d_year, s_city, p_brand1, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_nation = 'UNITED STATES' + and (d_year = 1997 or d_year = 1998) + and p_category = 'MFGR#14' +group by + d_year, s_city, p_brand1 +order by + d_year, s_city, p_brand1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((c_region = 'AMERICA') and (s_nation = 'UNITED STATES') and (p_category = 'MFGR#14') and ((d_year = 1997) or (d_year = 1998))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: d_year (type: int), p_brand1 (type: string), s_city (type: string), net_revenue (type: double) + outputColumnNames: d_year, p_brand1, s_city, net_revenue + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(net_revenue) + keys: d_year (type: int), s_city (type: string), p_brand1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP MATERIALIZED VIEW `ssb_mv` +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@ssb_mv +PREHOOK: Output: default@ssb_mv +POSTHOOK: query: DROP MATERIALIZED VIEW `ssb_mv` +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@ssb_mv +POSTHOOK: Output: default@ssb_mv diff --git ql/src/test/results/clientpositive/materialized_view_rewrite_ssb_2.q.out ql/src/test/results/clientpositive/materialized_view_rewrite_ssb_2.q.out new file mode 100644 index 0000000..c913a62 --- /dev/null +++ ql/src/test/results/clientpositive/materialized_view_rewrite_ssb_2.q.out @@ -0,0 +1,1627 @@ +PREHOOK: query: CREATE TABLE `customer`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer +POSTHOOK: query: CREATE TABLE `customer`( + `c_custkey` bigint, + `c_name` string, + `c_address` string, + `c_city` string, + `c_nation` string, + `c_region` string, + `c_phone` string, + `c_mktsegment` string, + primary key (`c_custkey`) disable rely) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer +PREHOOK: query: CREATE TABLE `dates`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely +) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dates +POSTHOOK: query: CREATE TABLE `dates`( + `d_datekey` bigint, + `d_date` string, + `d_dayofweek` string, + `d_month` string, + `d_year` int, + `d_yearmonthnum` int, + `d_yearmonth` string, + `d_daynuminweek` int, + `d_daynuminmonth` int, + `d_daynuminyear` int, + `d_monthnuminyear` int, + `d_weeknuminyear` int, + `d_sellingseason` string, + `d_lastdayinweekfl` int, + `d_lastdayinmonthfl` int, + `d_holidayfl` int , + `d_weekdayfl`int, + primary key (`d_datekey`) disable rely +) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dates +PREHOOK: query: CREATE TABLE `ssb_part`( + `p_partkey` bigint, + `p_name` string, + `p_mfgr` string, + `p_category` string, + `p_brand1` string, + `p_color` string, + `p_type` string, + `p_size` int, + `p_container` string, + primary key (`p_partkey`) disable rely) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ssb_part +POSTHOOK: query: CREATE TABLE `ssb_part`( + `p_partkey` bigint, + `p_name` string, + `p_mfgr` string, + `p_category` string, + `p_brand1` string, + `p_color` string, + `p_type` string, + `p_size` int, + `p_container` string, + primary key (`p_partkey`) disable rely) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ssb_part +PREHOOK: query: CREATE TABLE `supplier`( + `s_suppkey` bigint, + `s_name` string, + `s_address` string, + `s_city` string, + `s_nation` string, + `s_region` string, + `s_phone` string, + primary key (`s_suppkey`) disable rely) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@supplier +POSTHOOK: query: CREATE TABLE `supplier`( + `s_suppkey` bigint, + `s_name` string, + `s_address` string, + `s_city` string, + `s_nation` string, + `s_region` string, + `s_phone` string, + primary key (`s_suppkey`) disable rely) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@supplier +PREHOOK: query: CREATE TABLE `lineorder`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint not null disable rely, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates`(`d_datekey`) disable rely, + constraint fk3 foreign key (`lo_partkey`) references `ssb_part`(`p_partkey`) disable rely, + constraint fk4 foreign key (`lo_suppkey`) references `supplier`(`s_suppkey`) disable rely) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineorder +POSTHOOK: query: CREATE TABLE `lineorder`( + `lo_orderkey` bigint, + `lo_linenumber` int, + `lo_custkey` bigint not null disable rely, + `lo_partkey` bigint not null disable rely, + `lo_suppkey` bigint not null disable rely, + `lo_orderdate` bigint not null disable rely, + `lo_ordpriority` string, + `lo_shippriority` string, + `lo_quantity` double, + `lo_extendedprice` double, + `lo_ordtotalprice` double, + `lo_discount` double, + `lo_revenue` double, + `lo_supplycost` double, + `lo_tax` double, + `lo_commitdate` bigint, + `lo_shipmode` string, + primary key (`lo_orderkey`) disable rely, + constraint fk1 foreign key (`lo_custkey`) references `customer`(`c_custkey`) disable rely, + constraint fk2 foreign key (`lo_orderdate`) references `dates`(`d_datekey`) disable rely, + constraint fk3 foreign key (`lo_partkey`) references `ssb_part`(`p_partkey`) disable rely, + constraint fk4 foreign key (`lo_suppkey`) references `supplier`(`s_suppkey`) disable rely) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineorder +PREHOOK: query: analyze table customer compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +#### A masked pattern was here #### +POSTHOOK: query: analyze table customer compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +#### A masked pattern was here #### +PREHOOK: query: analyze table dates compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@dates +#### A masked pattern was here #### +POSTHOOK: query: analyze table dates compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dates +#### A masked pattern was here #### +PREHOOK: query: analyze table ssb_part compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@ssb_part +#### A masked pattern was here #### +POSTHOOK: query: analyze table ssb_part compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ssb_part +#### A masked pattern was here #### +PREHOOK: query: analyze table supplier compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@supplier +#### A masked pattern was here #### +POSTHOOK: query: analyze table supplier compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@supplier +#### A masked pattern was here #### +PREHOOK: query: analyze table lineorder compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@lineorder +#### A masked pattern was here #### +POSTHOOK: query: analyze table lineorder compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineorder +#### A masked pattern was here #### +PREHOOK: query: CREATE MATERIALIZED VIEW `ssb_mv` ENABLE REWRITE +AS +SELECT + cast(d_year || '-' || d_monthnuminyear || '-' || d_daynuminmonth as timestamp) as `__time`, + c_city, + c_nation, + c_region, + cast(d_weeknuminyear as string) d_weeknuminyear, + cast(d_year as string) d_year, + d_yearmonth, + cast(d_yearmonthnum as string) d_yearmonthnum, + cast(lo_discount as string) lo_discount, + cast(lo_quantity as string) lo_quantity, + p_brand1, + p_category, + p_mfgr, + s_city, + s_nation, + s_region, + lo_revenue, + lo_extendedprice * lo_discount discounted_price, + lo_revenue - lo_supplycost net_revenue +FROM + customer, dates, lineorder, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and lo_custkey = c_custkey +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@customer +PREHOOK: Input: default@dates +PREHOOK: Input: default@lineorder +PREHOOK: Input: default@ssb_part +PREHOOK: Input: default@supplier +PREHOOK: Output: database:default +PREHOOK: Output: default@ssb_mv +POSTHOOK: query: CREATE MATERIALIZED VIEW `ssb_mv` ENABLE REWRITE +AS +SELECT + cast(d_year || '-' || d_monthnuminyear || '-' || d_daynuminmonth as timestamp) as `__time`, + c_city, + c_nation, + c_region, + cast(d_weeknuminyear as string) d_weeknuminyear, + cast(d_year as string) d_year, + d_yearmonth, + cast(d_yearmonthnum as string) d_yearmonthnum, + cast(lo_discount as string) lo_discount, + cast(lo_quantity as string) lo_quantity, + p_brand1, + p_category, + p_mfgr, + s_city, + s_nation, + s_region, + lo_revenue, + lo_extendedprice * lo_discount discounted_price, + lo_revenue - lo_supplycost net_revenue +FROM + customer, dates, lineorder, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and lo_custkey = c_custkey +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@dates +POSTHOOK: Input: default@lineorder +POSTHOOK: Input: default@ssb_part +POSTHOOK: Input: default@supplier +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ssb_mv +PREHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_year = 1993 + and lo_discount between 1 and 3 + and lo_quantity < 25 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_year = 1993 + and lo_discount between 1 and 3 + and lo_quantity < 25 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(lo_quantity) < 25.0) and (UDFToInteger(d_year) = 1993) and UDFToDouble(lo_discount) BETWEEN 1.0 AND 3.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: discounted_price (type: double) + outputColumnNames: discounted_price + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(discounted_price) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_yearmonthnum = 199401 + and lo_discount between 4 and 6 + and lo_quantity between 26 and 35 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_yearmonthnum = 199401 + and lo_discount between 4 and 6 + and lo_quantity between 26 and 35 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToInteger(d_yearmonthnum) = 199401) and UDFToDouble(lo_discount) BETWEEN 4.0 AND 6.0 and UDFToDouble(lo_quantity) BETWEEN 26.0 AND 35.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: discounted_price (type: double) + outputColumnNames: discounted_price + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(discounted_price) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_weeknuminyear = 6 + and d_year = 1994 + and lo_discount between 5 and 7 + and lo_quantity between 26 and 35 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_extendedprice*lo_discount) as revenue +from + lineorder, dates +where + lo_orderdate = d_datekey + and d_weeknuminyear = 6 + and d_year = 1994 + and lo_discount between 5 and 7 + and lo_quantity between 26 and 35 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToInteger(d_year) = 1994) and (UDFToInteger(d_weeknuminyear) = 6) and UDFToDouble(lo_quantity) BETWEEN 26.0 AND 35.0 and UDFToDouble(lo_discount) BETWEEN 5.0 AND 7.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: discounted_price (type: double) + outputColumnNames: discounted_price + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(discounted_price) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_category = 'MFGR#12' + and s_region = 'AMERICA' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_category = 'MFGR#12' + and s_region = 'AMERICA' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((s_region = 'AMERICA') and (p_category = 'MFGR#12')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: UDFToInteger(d_year) (type: int), p_brand1 (type: string), lo_revenue (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: double), _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: double), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 between 'MFGR#2221' and 'MFGR#2228' + and s_region = 'ASIA' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 between 'MFGR#2221' and 'MFGR#2228' + and s_region = 'ASIA' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((s_region = 'ASIA') and p_brand1 BETWEEN 'MFGR#2221' AND 'MFGR#2228') (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: UDFToInteger(d_year) (type: int), p_brand1 (type: string), lo_revenue (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: double), _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: double), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 = 'MFGR#2239' + and s_region = 'EUROPE' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + sum(lo_revenue) as lo_revenue, d_year, p_brand1 +from + lineorder, dates, ssb_part, supplier +where + lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 = 'MFGR#2239' + and s_region = 'EUROPE' +group by + d_year, p_brand1 +order by + d_year, p_brand1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((p_brand1 = 'MFGR#2239') and (s_region = 'EUROPE')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: UDFToInteger(d_year) (type: int), lo_revenue (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: double), KEY.reducesinkkey0 (type: int), 'MFGR#2239' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + c_nation, s_nation, d_year, + sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_region = 'ASIA' + and s_region = 'ASIA' + and d_year >= 1992 and d_year <= 1997 +group by + c_nation, s_nation, d_year +order by + d_year asc, lo_revenue desc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + c_nation, s_nation, d_year, + sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_region = 'ASIA' + and s_region = 'ASIA' + and d_year >= 1992 and d_year <= 1997 +group by + c_nation, s_nation, d_year +order by + d_year asc, lo_revenue desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToInteger(d_year) >= 1992) and (UDFToInteger(d_year) <= 1997) and (c_region = 'ASIA') and (s_region = 'ASIA')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c_nation (type: string), s_nation (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col2 (type: int), _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col0 (type: int), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: double) + sort order: +- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED STATES' + and s_nation = 'UNITED STATES' + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED STATES' + and s_nation = 'UNITED STATES' + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToInteger(d_year) <= 1997) and (UDFToInteger(d_year) >= 1992) and (c_nation = 'UNITED STATES') and (s_nation = 'UNITED STATES')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c_city (type: string), s_city (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col2 (type: int), _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col0 (type: int), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: double) + sort order: +- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_year >= 1992 and d_year <= 1997 +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToInteger(d_year) >= 1992) and (UDFToInteger(d_year) <= 1997) and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c_city (type: string), s_city (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col2 (type: int), _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col0 (type: int), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: double) + sort order: +- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_yearmonth = 'Dec1997' +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + c_city, s_city, d_year, sum(lo_revenue) as lo_revenue +from + customer, lineorder, supplier, dates +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and d_yearmonth = 'Dec1997' +group by + c_city, s_city, d_year +order by + d_year asc, lo_revenue desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((d_yearmonth = 'Dec1997') and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c_city (type: string), s_city (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col2 (type: int), _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col0 (type: int), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: double) + sort order: +- + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + d_year, c_nation, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, c_nation +order by + d_year, c_nation +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + d_year, c_nation, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, c_nation +order by + d_year, c_nation +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((s_region = 'AMERICA') and (c_region = 'AMERICA') and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: UDFToInteger(d_year) (type: int), c_nation (type: string), net_revenue (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + d_year, s_nation, p_category, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (d_year = 1997 or d_year = 1998) + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, s_nation, p_category +order by + d_year, s_nation, p_category +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + d_year, s_nation, p_category, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (d_year = 1997 or d_year = 1998) + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') +group by + d_year, s_nation, p_category +order by + d_year, s_nation, p_category +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((c_region = 'AMERICA') and (s_region = 'AMERICA') and ((UDFToInteger(d_year) = 1997) or (UDFToInteger(d_year) = 1998)) and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: UDFToInteger(d_year) (type: int), s_nation (type: string), p_category (type: string), net_revenue (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col0 (type: int), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select + d_year, s_city, p_brand1, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_nation = 'UNITED STATES' + and (d_year = 1997 or d_year = 1998) + and p_category = 'MFGR#14' +group by + d_year, s_city, p_brand1 +order by + d_year, s_city, p_brand1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + d_year, s_city, p_brand1, + sum(lo_revenue - lo_supplycost) as profit +from + dates, customer, supplier, ssb_part, lineorder +where + lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_nation = 'UNITED STATES' + and (d_year = 1997 or d_year = 1998) + and p_category = 'MFGR#14' +group by + d_year, s_city, p_brand1 +order by + d_year, s_city, p_brand1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default.ssb_mv + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((c_region = 'AMERICA') and (s_nation = 'UNITED STATES') and (p_category = 'MFGR#14') and ((UDFToInteger(d_year) = 1997) or (UDFToInteger(d_year) = 1998))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: UDFToInteger(d_year) (type: int), s_city (type: string), p_brand1 (type: string), net_revenue (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col0 (type: int), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DROP MATERIALIZED VIEW `ssb_mv` +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@ssb_mv +PREHOOK: Output: default@ssb_mv +POSTHOOK: query: DROP MATERIALIZED VIEW `ssb_mv` +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@ssb_mv +POSTHOOK: Output: default@ssb_mv diff --git ql/src/test/results/clientpositive/outer_reference_windowed.q.out ql/src/test/results/clientpositive/outer_reference_windowed.q.out index 003a04c..06d8145 100644 --- ql/src/test/results/clientpositive/outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/outer_reference_windowed.q.out @@ -538,21 +538,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: e011_01 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - TableScan alias: e011_03 Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -568,6 +553,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(15,2)) + TableScan + alias: e011_01 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -578,8 +578,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0) - keys: _col1 (type: decimal(15,2)), _col2 (type: decimal(15,2)) + aggregations: sum(_col2) + keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out index d611103..a1a43b1 100644 --- ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out @@ -787,21 +787,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: e011_01 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: decimal(15,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: decimal(15,2)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - TableScan alias: e011_03 Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -817,6 +802,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(15,2)) + TableScan + alias: e011_01 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Map Vectorization: enabled: false enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false @@ -834,14 +834,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0) + aggregations: sum(_col2) Group By Vectorization: groupByMode: HASH vectorOutput: false native: false vectorProcessingMode: NONE projectedOutputColumns: null - keys: _col1 (type: decimal(15,2)), _col2 (type: decimal(15,2)) + keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE