diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java b/common/src/java/org/apache/hadoop/hive/conf/Constants.java index ee954d9aac..797d6820a0 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java +++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java @@ -67,4 +67,5 @@ public static final String HADOOP_CREDENTIAL_PROVIDER_PATH_CONFIG = "hadoop.security.credential.provider.path"; public static final String MATERIALIZED_VIEW_REWRITING_TIME_WINDOW = "rewriting.time.window"; + public static final String MATERIALIZED_VIEW_SIMULATED_ROW_COUNT = "materialization.simulated.rowCount"; } diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b213609f39..b78373a5b4 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1687,6 +1687,11 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_MATERIALIZED_VIEW_SERDE("hive.materializedview.serde", "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "Default SerDe used for materialized views"), + HIVE_SIMULATION_MODE_ENABLED("hive.simulation.enable", false, + "This property enables the simulated mode. In simulated mode, the optimizer will use virtual materialized views. " + + "This is useful to test whether a certain query rewriting would be triggered if the materialized view would be present " + + "in the system."), + HIVE_ENABLE_JDBC_PUSHDOWN("hive.jdbc.pushdown.enable", true, "Flag to control enabling pushdown of operators into JDBC connection and subsequent SQL generation\n" + "using Calcite."), diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 18e4f7f02c..6352548feb 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -605,6 +605,7 @@ minillaplocal.query.files=\ materialized_view_rewrite_part_2.q,\ materialized_view_rewrite_ssb.q,\ materialized_view_rewrite_ssb_2.q,\ + simulated_materialized_view_rewrite.q,\ mapjoin_decimal.q,\ mapjoin_hint.q,\ mapjoin_emit_interval.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 18089d59fd..fd4200de56 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -389,6 +389,13 @@ public boolean getExplainLogical() { return explainConfig != null && explainConfig.isLogical(); } + /** + * Find whether the current query is a cbo explain query + */ + public boolean getExplainCbo() { + return explainConfig != null && explainConfig.isCbo(); + } + public AnalyzeState getExplainAnalyze() { if (explainConfig != null) { return explainConfig.getAnalyze(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index dfa7e5e0a8..caf7b0246f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -54,6 +54,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.util.concurrent.ListenableFuture; +import org.apache.calcite.plan.RelOptMaterialization; import org.apache.calcite.rel.RelNode; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; @@ -4979,20 +4980,26 @@ private int createView(Hive db, CreateViewDesc crtView) throws HiveException { } else { // We create new view Table tbl = crtView.toTable(conf); - // We set the signature for the view if it is a materialized view - if (tbl.isMaterializedView()) { - CreationMetadata cm = - new CreationMetadata(MetaStoreUtils.getDefaultCatalog(conf), tbl.getDbName(), - tbl.getTableName(), ImmutableSet.copyOf(crtView.getTablesUsed())); - cm.setValidTxnList(conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY)); - tbl.getTTable().setCreationMetadata(cm); - } - db.createTable(tbl, crtView.getIfNotExists()); - addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK)); - //set lineage info - DataContainer dc = new DataContainer(tbl.getTTable()); - queryState.getLineageState().setLineage(new Path(crtView.getViewName()), dc, tbl.getCols()); + if (crtView.isSimulated()) { + // Save in the session state + SessionState.get().createSimulatedMaterializedView(tbl); + } else { + // We set the signature for the view if it is a materialized view + if (tbl.isMaterializedView()) { + CreationMetadata cm = + new CreationMetadata(MetaStoreUtils.getDefaultCatalog(conf), tbl.getDbName(), + tbl.getTableName(), ImmutableSet.copyOf(crtView.getTablesUsed())); + cm.setValidTxnList(conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY)); + tbl.getTTable().setCreationMetadata(cm); + } + db.createTable(tbl, crtView.getIfNotExists()); + addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK)); + + //set lineage info + DataContainer dc = new DataContainer(tbl.getTTable()); + queryState.getLineageState().setLineage(new Path(crtView.getViewName()), dc, tbl.getCols()); + } } return 0; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index b6b7c53123..a53443560a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -47,6 +47,7 @@ import org.apache.calcite.rel.type.RelDataTypeImpl; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl; @@ -88,7 +89,7 @@ /* Singleton */ private static final HiveMaterializedViewsRegistry SINGLETON = new HiveMaterializedViewsRegistry(); - /* Key is the database name. Value a map from the qualified name to the view object. */ + /* Key is the database name. Value a map from the name to the view object. */ private final ConcurrentMap> materializedViews = new ConcurrentHashMap>(); @@ -217,6 +218,25 @@ private RelOptMaterialization addMaterializedView(HiveConf conf, Table materiali // Start the process to add MV to the cache // First we parse the view query and create the materialization object + final RelOptMaterialization materialization = + generateRelOptMaterialization(conf, materializedViewTable); + if (opType == OpType.CREATE) { + // You store the materialized view + cq.put(materializedViewTable.getTableName(), materialization); + } else { + // For LOAD, you only add it if it does exist as you might be loading an outdated MV + cq.putIfAbsent(materializedViewTable.getTableName(), materialization); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Created materialized view for rewriting: " + + TableName.getDbTable(materializedViewTable.getDbName(), materializedViewTable.getTableName())); + } + return materialization; + } + + public static RelOptMaterialization generateRelOptMaterialization(HiveConf conf, Table materializedViewTable) { + // We parse the view query and create the materialization object final String viewQuery = materializedViewTable.getViewExpandedText(); final RelNode viewScan = createMaterializedViewScan(conf, materializedViewTable); if (viewScan == null) { @@ -233,20 +253,8 @@ private RelOptMaterialization addMaterializedView(HiveConf conf, Table materiali return null; } - RelOptMaterialization materialization = new RelOptMaterialization(viewScan, queryRel, + return new RelOptMaterialization(viewScan, queryRel, null, viewScan.getTable().getQualifiedName()); - if (opType == OpType.CREATE) { - // You store the materialized view - cq.put(materializedViewTable.getTableName(), materialization); - } else { - // For LOAD, you only add it if it does exist as you might be loading an outdated MV - cq.putIfAbsent(materializedViewTable.getTableName(), materialization); - } - - if (LOG.isDebugEnabled()) { - LOG.debug("Created materialized view for rewriting: " + viewScan.getTable().getQualifiedName()); - } - return materialization; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 995ff9b68c..8d8f9acf21 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -17,9 +17,11 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import com.google.common.collect.Sets; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -49,7 +51,9 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.apache.calcite.util.mapping.IntPair; +import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -57,6 +61,7 @@ import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo.ForeignKeyCol; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.PartitionIterable; import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo; import org.apache.hadoop.hive.ql.metadata.Table; @@ -428,15 +433,18 @@ public RelDistribution getDistribution() { @Override public double getRowCount() { if (rowCount == -1) { - if (null == partitionList) { - // we are here either unpartitioned table or partitioned table with no - // predicates - computePartitionList(hiveConf, null, new HashSet()); + if (needsSimulatedStats()) { + rowCount = Double.parseDouble(hiveTblMetadata.getProperty(Constants.MATERIALIZED_VIEW_SIMULATED_ROW_COUNT)); + } else { + if (null == partitionList) { + // we are here either unpartitioned table or partitioned table with no + // predicates + computePartitionList(hiveConf, null, new HashSet()); + } + rowCount = StatsUtils.getNumRows(hiveConf, getNonPartColumns(), hiveTblMetadata, + partitionList, noColsMissingStats); } - rowCount = StatsUtils.getNumRows(hiveConf, getNonPartColumns(), hiveTblMetadata, - partitionList, noColsMissingStats); } - return rowCount; } @@ -481,10 +489,70 @@ public void computePartitionList(HiveConf conf, RexNode pruneNode, Set } private void updateColStats(Set projIndxLst, boolean allowMissingStats) { - List nonPartColNamesThatRqrStats = new ArrayList(); - List nonPartColIndxsThatRqrStats = new ArrayList(); - List partColNamesThatRqrStats = new ArrayList(); - List partColIndxsThatRqrStats = new ArrayList(); + if (needsSimulatedStats()) { + // If we need stats for a materialized view in simulated mode, we will + // estimate the stats and populate the needed data structures + + // Number of rows + rowCount = Double.parseDouble(hiveTblMetadata.getProperty(Constants.MATERIALIZED_VIEW_SIMULATED_ROW_COUNT)); + + // Get column stats that are not already available + List schema = new ArrayList<>(); + List neededColumns = new ArrayList<>(); + List colIndexesRequired = new ArrayList<>(); + for (Integer pi : projIndxLst) { + if (hiveColStatsMap.get(pi) == null) { + ColumnInfo tmp; + if ((tmp = hiveNonPartitionColsMap.get(pi)) != null || + (tmp = hivePartitionColsMap.get(pi)) != null) { + schema.add(tmp); + neededColumns.add(tmp.getInternalName()); + colIndexesRequired.add(pi); + } else { + String logMsg = "Unable to find Column Index: " + pi + ", in " + + hiveTblMetadata.getCompleteName(); + LOG.error(logMsg); + throw new RuntimeException(logMsg); + } + } + } + List estimatedColumnStats = StatsUtils.estimateStats(hiveTblMetadata, + schema, neededColumns, hiveConf, (long) rowCount); + for (int i = 0; i < colIndexesRequired.size(); i++) { + hiveColStatsMap.put(colIndexesRequired.get(i), estimatedColumnStats.get(i)); + } + + // Populate information about the partition list (we consider single partition) + if (partitionList == null) { + try { + partitionList = new PrunedPartitionList(hiveTblMetadata, hiveTblMetadata.getFullyQualifiedName() + ";", + Sets.newHashSet(new Partition(hiveTblMetadata)), null, false); + partitionCache.put(partitionList.getKey(), partitionList); + } catch (HiveException e) { + String logMsg = "Collecting stats for table: " + hiveTblMetadata.getTableName() + " failed."; + LOG.error(logMsg, e); + throw new RuntimeException(logMsg, e); + } + } + + // Add to stats cache + ColumnStatsList colStatsCached = colStatsCache.get(partitionList.getKey()); + if (colStatsCached == null) { + colStatsCached = new ColumnStatsList(); + colStatsCache.put(partitionList.getKey(), colStatsCached); + } + for (int i = 0; i < neededColumns.size(); i++) { + colStatsCached.put(neededColumns.get(i), hiveColStatsMap.get(colIndexesRequired.get(i))); + } + colStatsCached.updateState(State.COMPLETE); + + return; + } + + List nonPartColNamesThatRqrStats = new ArrayList<>(); + List nonPartColIndxsThatRqrStats = new ArrayList<>(); + List partColNamesThatRqrStats = new ArrayList<>(); + List partColIndxsThatRqrStats = new ArrayList<>(); Set colNamesFailedStats = new HashSet(); // 1. Separate required columns to Non Partition and Partition Cols @@ -764,4 +832,10 @@ public String getPartitionListKey() { return partitionList != null ? partitionList.getKey() : null; } + private boolean needsSimulatedStats() { + // If we are in simulation mode and this is a materialized view, we just use + // estimated stats + return HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_SIMULATION_MODE_ENABLED) && + hiveTblMetadata.isMaterializedView(); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index fb31254bbf..a6163039eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -1482,6 +1482,7 @@ private boolean createDynPartSpec(ASTNode ast) { } return false; } + public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec) throws SemanticException { assert (ast.getToken().getType() == HiveParser.TOK_TAB diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index aacf9e3403..bd2a102b41 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -439,7 +439,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept List oldHints = new ArrayList<>(); // Cache the hints before CBO runs and removes them. // Use the hints later in top level QB. - getHintsFromQB(getQB(), oldHints); + getHintsFromQB(getQB(), oldHints); // Note: for now, we don't actually pass the queryForCbo to CBO, because // it accepts qb, not AST, and can also access all the private stuff in @@ -499,6 +499,17 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept } catch (SemanticException e) { throw new CalciteViewSemanticException(e.getMessage()); } + + // If the materialized view is simulated, just add estimations about row count + if (createVwDesc.isSimulated()) { + Map tblProps = createVwDesc.getTblProps(); + if (tblProps == null) { + tblProps = new HashMap<>(); + createVwDesc.setTblProps(tblProps); + } + tblProps.put(Constants.MATERIALIZED_VIEW_SIMULATED_ROW_COUNT, + Double.toString(RelMetadataQuery.instance().getRowCount(newPlan))); + } } else if (cboCtx.type == PreCboCtx.Type.CTAS) { // CTAS init(false); @@ -2183,10 +2194,15 @@ private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode ba materializations = db.getValidMaterializedView(mvRebuildDbName, mvRebuildName, getTablesUsed(basePlan), true, getTxnMgr()); } else { - // This is not a rebuild, we retrieve all the materializations. In turn, we do not need - // to force the materialization contents to be up-to-date, as this is not a rebuild, and - // we apply the user parameters (HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW) instead. - materializations = db.getAllValidMaterializedViews(getTablesUsed(basePlan), false, getTxnMgr()); + // This is not a rebuild + if (ctx.isExplainPlan() && HiveConf.getBoolVar(conf, ConfVars.HIVE_SIMULATION_MODE_ENABLED)) { + materializations = SessionState.get().getSimulatedMaterializedViews(); + } else { + // We retrieve all the materializations. In turn, we do not need to force the materialization + // contents to be up-to-date, as this is not a rebuild, and we apply the user parameters + // (HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW) instead. + materializations = db.getAllValidMaterializedViews(getTablesUsed(basePlan), false, getTxnMgr()); + } } // We need to use the current cluster for the scan operator on views, // otherwise the planner will throw an Exception (different planners) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 0e5b3e5473..c8cbbfca5a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -40,6 +40,8 @@ import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -1435,21 +1437,44 @@ private void analyzeSwitchDatabase(ASTNode ast) throws SemanticException { private void analyzeDropTable(ASTNode ast, TableType expectedType) throws SemanticException { String tableName = getUnescapedName((ASTNode) ast.getChild(0)); - boolean ifExists = (ast.getFirstChildWithType(HiveParser.TOK_IFEXISTS) != null); // we want to signal an error if the table/view doesn't exist and we're // configured not to fail silently + boolean ifExists = (ast.getFirstChildWithType(HiveParser.TOK_IFEXISTS) != null); boolean throwException = !ifExists && !HiveConf.getBoolVar(conf, ConfVars.DROPIGNORESNONEXISTENT); - ReplicationSpec replicationSpec = new ReplicationSpec(ast); + boolean simulated = false; + if (expectedType == TableType.MATERIALIZED_VIEW) { + simulated = (ast.getFirstChildWithType(HiveParser.TOK_SIMULATED) != null); + } + + Table tab; + if (simulated) { + // This materialized view lives in the user session, just do the proper + // checks and delete + String[] names = Utilities.getDbTableName(tableName); + Pair simulatedMaterializedView = + SessionState.get().getSimulatedMaterializedView(names[0], names[1]); + tab = simulatedMaterializedView != null ? simulatedMaterializedView.getLeft() : null; + if (tab == null && throwException) { + throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName)); + } + + inputs.add(new ReadEntity(tab)); + outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_EXCLUSIVE)); + SessionState.get().dropSimulatedMaterializedView(tab); + return; + } else { + tab = getTable(tableName, throwException); + } - Table tab = getTable(tableName, throwException); if (tab != null) { inputs.add(new ReadEntity(tab)); outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_EXCLUSIVE)); } boolean ifPurge = (ast.getFirstChildWithType(HiveParser.KW_PURGE) != null); + ReplicationSpec replicationSpec = new ReplicationSpec(ast); DropTableDesc dropTblDesc = new DropTableDesc(tableName, expectedType, ifExists, ifPurge, replicationSpec); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropTblDesc))); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 253633cfb3..e1738dae42 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -381,6 +381,7 @@ KW_UNMANAGED: 'UNMANAGED'; KW_APPLICATION: 'APPLICATION'; KW_SYNC: 'SYNC'; KW_AST: 'AST'; +KW_SIMULATED: 'SIMULATED'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 1ffdec0fe1..dcb9410802 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -428,6 +428,7 @@ TOK_REPLACE; TOK_LIKERP; TOK_UNMANAGED; TOK_INPUTFORMAT; +TOK_SIMULATED; } @@ -619,6 +620,7 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_PATH", "PATH"); xlateMap.put("KW_AST", "AST"); xlateMap.put("KW_TRANSACTIONAL", "TRANSACTIONAL"); + xlateMap.put("KW_SIMULATED", "SIMULATED"); // Operators xlateMap.put("DOT", "."); @@ -1958,10 +1960,11 @@ createMaterializedViewStatement pushMsg("create materialized view statement", state); } @after { popMsg(state); } - : KW_CREATE KW_MATERIALIZED KW_VIEW (ifNotExists)? name=tableName + : KW_CREATE simulated? KW_MATERIALIZED KW_VIEW (ifNotExists)? name=tableName rewriteDisabled? tableComment? viewPartition? tableRowFormat? tableFileFormat? tableLocation? tablePropertiesPrefixed? KW_AS selectStatementWithCTE -> ^(TOK_CREATE_MATERIALIZED_VIEW $name + simulated? ifNotExists? rewriteDisabled? tableComment? @@ -1974,10 +1977,16 @@ createMaterializedViewStatement ) ; +simulated +@init { pushMsg("simulated def", state); } +@after { popMsg(state); } + : KW_SIMULATED -> ^(TOK_SIMULATED) + ; + dropMaterializedViewStatement @init { pushMsg("drop materialized view statement", state); } @after { popMsg(state); } - : KW_DROP KW_MATERIALIZED KW_VIEW ifExists? viewName -> ^(TOK_DROP_MATERIALIZED_VIEW viewName ifExists?) + : KW_DROP simulated? KW_MATERIALIZED KW_VIEW ifExists? viewName -> ^(TOK_DROP_MATERIALIZED_VIEW viewName simulated? ifExists?) ; showFunctionIdentifier diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 417955cde7..5a89241791 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -806,7 +806,7 @@ nonReserved | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_QUERY | KW_QUARTER | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE - | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SHARED + | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SIMULATED | KW_SHARED | KW_SHOW | KW_SHOW_DATABASE | KW_SKEWED | KW_SORT | KW_SORTED | KW_SSL | KW_STATISTICS | KW_STORED | KW_AST | KW_STREAMTABLE | KW_STRING | KW_STRUCT | KW_TABLES | KW_TBLPROPERTIES | KW_TEMPORARY | KW_TERMINATED | KW_TINYINT | KW_TOUCH | KW_TRANSACTIONAL | KW_TRANSACTIONS | KW_UNARCHIVE | KW_UNDO | KW_UNIONTYPE | KW_UNLOCK | KW_UNSET diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 54f34f6b0f..2a0291c42e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -54,6 +54,7 @@ import org.antlr.runtime.tree.TreeVisitorAction; import org.antlr.runtime.tree.TreeWizard; import org.antlr.runtime.tree.TreeWizard.ContextVisitor; +import org.apache.calcite.plan.RelOptMaterialization; import org.apache.calcite.rel.RelNode; import org.apache.calcite.util.ImmutableBitSet; import org.apache.commons.collections.CollectionUtils; @@ -72,6 +73,7 @@ import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; +import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; @@ -163,6 +165,7 @@ import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc; import org.apache.hadoop.hive.ql.optimizer.lineage.Generator; @@ -2108,6 +2111,7 @@ private void getMetaData(QB qb, ReadEntity parentInput) // do a deep copy, in case downstream changes it. tab = new Table(tab.getTTable().deepCopy()); } + if (tab == null || tab.getDbName().equals(SessionState.get().getCurrentDatabase())) { Table materializedTab = ctx.getMaterializedTable(cteName); @@ -2127,10 +2131,18 @@ private void getMetaData(QB qb, ReadEntity parentInput) } } + if (tab == null && tabName.equals(DUMMY_DATABASE + "." + DUMMY_TABLE)) { + continue; + } + + if (tab == null && ctx.isExplainPlan()) { + String[] names = Utilities.getDbTableName(tabName); + Pair simulatedMaterializedView = + SessionState.get().getSimulatedMaterializedView(names[0], names[1]); + tab = simulatedMaterializedView != null ? simulatedMaterializedView.getLeft() : null; + } + if (tab == null) { - if(tabName.equals(DUMMY_DATABASE + "." + DUMMY_TABLE)) { - continue; - } ASTNode src = qb.getParseInfo().getSrcForAlias(alias); if (null != src) { throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src)); @@ -12389,6 +12401,14 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce if (createVwDesc.isMaterialized()) { createVwDesc.setTablesUsed(getTablesUsed(pCtx)); + + if (createVwDesc.isSimulated()) { + // If this is a simulated materialized view, we do not need to optimize + // or translate the physical plan. + ctx.setResDir(null); + ctx.setResFile(null); + return; + } } else { // Since we're only creating a view (not executing it), we don't need to // optimize or translate the plan (and in fact, those procedures can @@ -12460,7 +12480,7 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce // 9. Optimize Physical op tree & Translate to target execution engine (MR, // TEZ..) - if (!ctx.getExplainLogical()) { + if (!ctx.getExplainCbo() && !ctx.getExplainLogical()) { TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx); compiler.init(queryState, console, db); compiler.compile(pCtx, rootTasks, inputs, outputs); @@ -13578,6 +13598,7 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt List cols = null; boolean ifNotExists = false; boolean rewriteEnabled = true; + boolean simulated = false; boolean orReplace = false; boolean isAlterViewAs = false; String comment = null; @@ -13605,6 +13626,9 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt case HiveParser.TOK_REWRITE_DISABLED: rewriteEnabled = false; break; + case HiveParser.TOK_SIMULATED: + simulated = true; + break; case HiveParser.TOK_ORREPLACE: orReplace = true; break; @@ -13654,6 +13678,10 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt throw new SemanticException("Can't combine IF NOT EXISTS and OR REPLACE."); } + if (!rewriteEnabled && simulated) { + throw new SemanticException("Cannot add simulated materialized view without rewriting enabled."); + } + if (isMaterialized && !ifNotExists) { // Verify that the table does not already exist // dumpTable is only used to check the conflict for non-temporary tables @@ -13678,10 +13706,14 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt if (isMaterialized) { createVwDesc = new CreateViewDesc( dbDotTable, cols, comment, tblProps, partColNames, - ifNotExists, isRebuild, rewriteEnabled, isAlterViewAs, + ifNotExists, isRebuild, rewriteEnabled, simulated, isAlterViewAs, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps()); + if (simulated) { + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + createVwDesc))); + } addDbAndTabToOutputs(qualTabName, TableType.MATERIALIZED_VIEW, false, tblProps); queryState.setCommandType(HiveOperation.CREATE_MATERIALIZED_VIEW); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java index 7130aba597..3e7b6cb474 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java @@ -53,6 +53,7 @@ private String originalText; private String expandedText; private boolean rewriteEnabled; + private boolean simulated; private List schema; private Map tblProps; private List partColNames; @@ -97,8 +98,8 @@ public CreateViewDesc() { */ public CreateViewDesc(String viewName, List schema, String comment, Map tblProps, List partColNames, - boolean ifNotExists, boolean replace, boolean rewriteEnabled, boolean isAlterViewAs, - String inputFormat, String outputFormat, String location, + boolean ifNotExists, boolean replace, boolean rewriteEnabled, boolean simulated, + boolean isAlterViewAs, String inputFormat, String outputFormat, String location, String serde, String storageHandler, Map serdeProps) { this.viewName = viewName; this.schema = schema; @@ -109,6 +110,7 @@ public CreateViewDesc(String viewName, List schema, String comment, this.replace = replace; this.isMaterialized = true; this.rewriteEnabled = rewriteEnabled; + this.simulated = simulated; this.isAlterViewAs = isAlterViewAs; this.inputFormat = inputFormat; this.outputFormat = outputFormat; @@ -146,6 +148,7 @@ public CreateViewDesc(String viewName, List schema, String comment, this.isAlterViewAs = isAlterViewAs; this.isMaterialized = false; this.rewriteEnabled = false; + this.simulated = false; this.inputFormat = inputFormat; this.outputFormat = outputFormat; this.serde = serde; @@ -256,6 +259,10 @@ public void setTablesUsed(Set tablesUsed) { this.tablesUsed = tablesUsed; } + public boolean isSimulated() { + return simulated; + } + @Explain(displayName = "replace", displayOnlyOnTrue = true) public boolean isReplace() { return replace; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java index 50b43bad31..3eb077e943 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java @@ -96,7 +96,7 @@ public ImportTableDesc(String dbName, Table table) throws Exception { null, // comment passed as table params table.getParameters(), table.getPartColNames(), - false,false,false,false, + false,false,false,false,false, table.getSd().getInputFormat(), table.getSd().getOutputFormat(), null, // location: set to null here, can be overwritten by the IMPORT stmt diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index de5cd8b992..4758050c41 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -42,12 +42,15 @@ import java.util.UUID; import java.util.concurrent.CancellationException; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; +import org.apache.calcite.plan.RelOptMaterialization; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -55,6 +58,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.common.io.SessionStream; import org.apache.hadoop.hive.common.log.ProgressMonitor; import org.apache.hadoop.hive.common.type.Timestamp; @@ -88,6 +92,7 @@ import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; import org.apache.hadoop.hive.ql.metadata.Table; @@ -131,12 +136,17 @@ /** * Concurrent since SessionState is often propagated to workers in thread pools */ - private final Map> tempTables = new ConcurrentHashMap<>(); + private final Map> tempTables = + new ConcurrentHashMap<>(); private final Map> tempTableColStats = new ConcurrentHashMap<>(); private final Map tempPartitions = new ConcurrentHashMap<>(); + /* Key is the database name. Value a map from the name to the view object. */ + private final Map>> simulatedMaterializedViews = + new ConcurrentHashMap<>(); + protected ClassLoader parentLoader; // Session-scope compile lock. @@ -1921,6 +1931,7 @@ public void applyAuthorizationPolicy() throws HiveException { public Map> getTempTables() { return tempTables; } + public Map getTempPartitions() { return tempPartitions; } @@ -1929,6 +1940,57 @@ public void applyAuthorizationPolicy() throws HiveException { return tempTableColStats; } + public void createSimulatedMaterializedView(Table materializedViewTable) throws HiveException { + RelOptMaterialization materialization = + HiveMaterializedViewsRegistry.generateRelOptMaterialization(sessionConf, materializedViewTable); + // We are going to create the map for each view in the given database + Map> cq = + new ConcurrentHashMap<>(); + final Map > prevCq = simulatedMaterializedViews.putIfAbsent( + materializedViewTable.getDbName(), cq); + if (prevCq != null) { + cq = prevCq; + } + // Now we add the new simulated materialization + Pair prevMaterialization = + cq.putIfAbsent(materializedViewTable.getTableName(), Pair.of(materializedViewTable, materialization)); + if (prevMaterialization != null) { + throw new HiveException("Simulated materialization with same name already exists"); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Created simulated materialized view: " + + TableName.getDbTable(materializedViewTable.getDbName(), materializedViewTable.getTableName())); + } + } + + public Pair getSimulatedMaterializedView(String dbName, String tblName) { + final Map> cq = simulatedMaterializedViews.get(dbName); + if (cq != null) { + return cq.get(tblName); + } + return null; + } + + public List getSimulatedMaterializedViews() { + List result = new ArrayList<>(); + for (Map> v : simulatedMaterializedViews.values()) { + for (Pair p : v.values()) { + result.add(p.getRight()); + } + } + return result; + } + + public void dropSimulatedMaterializedView(Table materializedViewTable) { + simulatedMaterializedViews.get(materializedViewTable.getDbName()).remove(materializedViewTable.getTableName()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Removed simulated materialized view: " + + TableName.getDbTable(materializedViewTable.getDbName(), materializedViewTable.getTableName())); + } + } + /** * @return ip address for user running the query */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 2a7cf8c897..be69fae8d7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -209,8 +209,7 @@ public static long getNumRows(HiveConf conf, List schema, Table tabl } private static void estimateStatsForMissingCols(List neededColumns, List columnStats, - Table table, HiveConf conf, long nr, List schema) { - + Table table, HiveConf conf, long nr, List schema) { Set neededCols = new HashSet<>(neededColumns); Set colsWithStats = new HashSet<>(); @@ -974,11 +973,9 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ return cs; } - private static List estimateStats(Table table, List schema, + public static List estimateStats(Table table, List schema, List neededColumns, HiveConf conf, long nr) { - List stats = new ArrayList(neededColumns.size()); - for (int i = 0; i < neededColumns.size(); i++) { ColStatistics cs = estimateColStats(nr, neededColumns.get(i), conf, schema); stats.add(cs); diff --git a/ql/src/test/queries/clientpositive/simulated_materialized_view_rewrite.q b/ql/src/test/queries/clientpositive/simulated_materialized_view_rewrite.q new file mode 100644 index 0000000000..0f9fd327d4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/simulated_materialized_view_rewrite.q @@ -0,0 +1,67 @@ +-- SORT_QUERY_RESULTS + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; + +create table emps_n3 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps_n3 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250); + +create table depts_n2 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into depts_n2 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20); + +create table dependents_n2 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into dependents_n2 values (10, 'Michael'), (20, 'Jane'); + +create table locations_n2 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into locations_n2 values (10, 'San Francisco'), (20, 'San Diego'); + +alter table emps_n3 add constraint pk1 primary key (empid) disable novalidate rely; +alter table depts_n2 add constraint pk2 primary key (deptno) disable novalidate rely; +alter table dependents_n2 add constraint pk3 primary key (empid) disable novalidate rely; +alter table locations_n2 add constraint pk4 primary key (locationid) disable novalidate rely; + +alter table emps_n3 add constraint fk1 foreign key (deptno) references depts_n2(deptno) disable novalidate rely; +alter table depts_n2 add constraint fk2 foreign key (locationid) references locations_n2(locationid) disable novalidate rely; + +create simulated materialized view mv1_n2 as +select * from emps_n3 where empid < 150; + +explain cbo +select * +from (select * from emps_n3 where empid < 120) t +join depts_n2 using (deptno); + +set hive.simulation.enable=true; + +explain cbo +select * +from (select * from emps_n3 where empid < 120) t +join depts_n2 using (deptno); + + +drop simulated materialized view mv1_n2; + +explain cbo +select * +from (select * from emps_n3 where empid < 120) t +join depts_n2 using (deptno); diff --git a/ql/src/test/results/clientpositive/llap/simulated_materialized_view_rewrite.q.out b/ql/src/test/results/clientpositive/llap/simulated_materialized_view_rewrite.q.out new file mode 100644 index 0000000000..303ac9153f --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/simulated_materialized_view_rewrite.q.out @@ -0,0 +1,233 @@ +PREHOOK: query: create table emps_n3 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps_n3 +POSTHOOK: query: create table emps_n3 ( + empid int, + deptno int, + name varchar(256), + salary float, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps_n3 +PREHOOK: query: insert into emps_n3 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps_n3 +POSTHOOK: query: insert into emps_n3 values (100, 10, 'Bill', 10000, 1000), (200, 20, 'Eric', 8000, 500), + (150, 10, 'Sebastian', 7000, null), (110, 10, 'Theodore', 10000, 250), (120, 10, 'Bill', 10000, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps_n3 +POSTHOOK: Lineage: emps_n3.commission SCRIPT [] +POSTHOOK: Lineage: emps_n3.deptno SCRIPT [] +POSTHOOK: Lineage: emps_n3.empid SCRIPT [] +POSTHOOK: Lineage: emps_n3.name SCRIPT [] +POSTHOOK: Lineage: emps_n3.salary SCRIPT [] +PREHOOK: query: create table depts_n2 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@depts_n2 +POSTHOOK: query: create table depts_n2 ( + deptno int, + name varchar(256), + locationid int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@depts_n2 +PREHOOK: query: insert into depts_n2 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@depts_n2 +POSTHOOK: query: insert into depts_n2 values (10, 'Sales', 10), (30, 'Marketing', null), (20, 'HR', 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@depts_n2 +POSTHOOK: Lineage: depts_n2.deptno SCRIPT [] +POSTHOOK: Lineage: depts_n2.locationid SCRIPT [] +POSTHOOK: Lineage: depts_n2.name SCRIPT [] +PREHOOK: query: create table dependents_n2 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dependents_n2 +POSTHOOK: query: create table dependents_n2 ( + empid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dependents_n2 +PREHOOK: query: insert into dependents_n2 values (10, 'Michael'), (20, 'Jane') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dependents_n2 +POSTHOOK: query: insert into dependents_n2 values (10, 'Michael'), (20, 'Jane') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dependents_n2 +POSTHOOK: Lineage: dependents_n2.empid SCRIPT [] +POSTHOOK: Lineage: dependents_n2.name SCRIPT [] +PREHOOK: query: create table locations_n2 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@locations_n2 +POSTHOOK: query: create table locations_n2 ( + locationid int, + name varchar(256)) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@locations_n2 +PREHOOK: query: insert into locations_n2 values (10, 'San Francisco'), (20, 'San Diego') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@locations_n2 +POSTHOOK: query: insert into locations_n2 values (10, 'San Francisco'), (20, 'San Diego') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@locations_n2 +POSTHOOK: Lineage: locations_n2.locationid SCRIPT [] +POSTHOOK: Lineage: locations_n2.name SCRIPT [] +PREHOOK: query: alter table emps_n3 add constraint pk1 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps_n3 add constraint pk1 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts_n2 add constraint pk2 primary key (deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts_n2 add constraint pk2 primary key (deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table dependents_n2 add constraint pk3 primary key (empid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table dependents_n2 add constraint pk3 primary key (empid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table locations_n2 add constraint pk4 primary key (locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table locations_n2 add constraint pk4 primary key (locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table emps_n3 add constraint fk1 foreign key (deptno) references depts_n2(deptno) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table emps_n3 add constraint fk1 foreign key (deptno) references depts_n2(deptno) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: alter table depts_n2 add constraint fk2 foreign key (locationid) references locations_n2(locationid) disable novalidate rely +PREHOOK: type: ALTERTABLE_ADDCONSTRAINT +POSTHOOK: query: alter table depts_n2 add constraint fk2 foreign key (locationid) references locations_n2(locationid) disable novalidate rely +POSTHOOK: type: ALTERTABLE_ADDCONSTRAINT +PREHOOK: query: create simulated materialized view mv1_n2 as +select * from emps_n3 where empid < 150 +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps_n3 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_n2 +#### A masked pattern was here #### +POSTHOOK: query: create simulated materialized view mv1_n2 as +select * from emps_n3 where empid < 150 +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps_n3 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_n2 +#### A masked pattern was here #### +PREHOOK: query: explain cbo +select * +from (select * from emps_n3 where empid < 120) t +join depts_n2 using (deptno) +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n2 +PREHOOK: Input: default@emps_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * +from (select * from emps_n3 where empid < 120) t +join depts_n2 using (deptno) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n2 +POSTHOOK: Input: default@emps_n3 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(deptno=[$1], empid=[$0], name=[$2], salary=[$3], commission=[$4], name1=[$6], locationid=[$7]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(empid=[$0], deptno=[$1], name=[$2], salary=[$3], commission=[$4]) + HiveFilter(condition=[AND(<($0, 120), IS NOT NULL($1))]) + HiveTableScan(table=[[default, emps_n3]], table:alias=[emps_n3]) + HiveProject(deptno=[$0], name=[$1], locationid=[$2]) + HiveTableScan(table=[[default, depts_n2]], table:alias=[depts_n2]) + +PREHOOK: query: explain cbo +select * +from (select * from emps_n3 where empid < 120) t +join depts_n2 using (deptno) +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n2 +PREHOOK: Input: default@emps_n3 +PREHOOK: Input: default@mv1_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * +from (select * from emps_n3 where empid < 120) t +join depts_n2 using (deptno) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n2 +POSTHOOK: Input: default@emps_n3 +POSTHOOK: Input: default@mv1_n2 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(deptno0=[CAST($5):INTEGER], empid=[$0], name=[$2], salary=[$3], commission=[$4], name0=[$6], locationid=[$7]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(empid=[CAST($0):INTEGER NOT NULL], deptno=[$1], name=[$2], salary=[$3], commission=[$4]) + HiveFilter(condition=[AND(>(120, CAST($0):INTEGER NOT NULL), IS NOT NULL($1))]) + HiveTableScan(table=[[default, mv1_n2]], table:alias=[default.mv1_n2]) + HiveProject(deptno=[$0], name=[$1], locationid=[$2]) + HiveTableScan(table=[[default, depts_n2]], table:alias=[depts_n2]) + +PREHOOK: query: drop simulated materialized view mv1_n2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_n2 +PREHOOK: Output: default@mv1_n2 +POSTHOOK: query: drop simulated materialized view mv1_n2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_n2 +POSTHOOK: Output: default@mv1_n2 +PREHOOK: query: explain cbo +select * +from (select * from emps_n3 where empid < 120) t +join depts_n2 using (deptno) +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n2 +PREHOOK: Input: default@emps_n3 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * +from (select * from emps_n3 where empid < 120) t +join depts_n2 using (deptno) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n2 +POSTHOOK: Input: default@emps_n3 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(deptno=[$1], empid=[$0], name=[$2], salary=[$3], commission=[$4], name1=[$6], locationid=[$7]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(empid=[$0], deptno=[$1], name=[$2], salary=[$3], commission=[$4]) + HiveFilter(condition=[AND(<($0, 120), IS NOT NULL($1))]) + HiveTableScan(table=[[default, emps_n3]], table:alias=[emps_n3]) + HiveProject(deptno=[$0], name=[$1], locationid=[$2]) + HiveTableScan(table=[[default, depts_n2]], table:alias=[depts_n2]) + diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 2a684018df..db9dd44ea6 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -7183,7 +7183,6 @@ POSTHOOK: query: CREATE TABLE `date_dim`( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@date_dim -Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain cbo with avg_sales as (select avg(quantity*list_price) average_sales from (select ss_quantity quantity @@ -7226,7 +7225,6 @@ HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Shuffle Join MERGEJOIN[73][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain cbo with avg_sales as (select avg(quantity*list_price) over( partition by list_price) average_sales from (select ss_quantity quantity diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out index c09da86ce1..cc31bd82f5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out @@ -1,6 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[1164][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1171][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1178][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain cbo with cross_items as (select i_item_sk ss_item_sk diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out index 4019b26f17..06bfc5b086 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out @@ -1,7 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 27' is a cross product -Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 28' is a cross product -Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product -Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 32' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out index 3add88ad25..971bcfa13d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[301][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with ssales as (select c_last_name diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query28.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query28.q.out index 643e5b6259..bcc34b6cbb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query28.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query28.q.out @@ -1,8 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[102][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[103][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[104][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[105][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product -Warning: Shuffle Join MERGEJOIN[106][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain cbo select * from (select avg(ss_list_price) B1_LP diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out index 50fa078159..da64211b95 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out @@ -1,7 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 30' is a cross product -Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with my_customers as ( select distinct c_customer_sk diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out index b4410ff07a..24cad8509c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[404][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain cbo with ss_items as (select i_item_id item_id diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out index f502c004cf..02715c59ea 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out @@ -1,4 +1,3 @@ -Warning: Map Join MAPJOIN[171][bigTable=?] in task 'Reducer 15' is a cross product PREHOOK: query: explain cbo select a.ca_state state, count(*) cnt from customer_address a diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out index 8e8cf27078..48c4a39198 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query61.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[266][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: explain cbo select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 from diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out index 91ad054e66..2015f634a7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[317][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain cbo with ss as (select s_store_sk, diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query88.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query88.q.out index 1f4471e8ae..6afcad8fa1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query88.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query88.q.out @@ -1,10 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[599][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[600][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product -Warning: Shuffle Join MERGEJOIN[601][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 8' is a cross product -Warning: Shuffle Join MERGEJOIN[602][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[603][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 10' is a cross product -Warning: Shuffle Join MERGEJOIN[604][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[605][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 12' is a cross product PREHOOK: query: explain cbo select * from diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query9.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query9.q.out index 3a1e1990ef..45e2797dcc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query9.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query9.q.out @@ -1,18 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[171][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[172][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[173][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[174][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[175][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[176][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product -Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 8' is a cross product -Warning: Shuffle Join MERGEJOIN[178][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[179][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9]] in Stage 'Reducer 10' is a cross product -Warning: Shuffle Join MERGEJOIN[180][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[181][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11]] in Stage 'Reducer 12' is a cross product -Warning: Shuffle Join MERGEJOIN[182][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12]] in Stage 'Reducer 13' is a cross product -Warning: Shuffle Join MERGEJOIN[183][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[184][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13, $hdt$_14]] in Stage 'Reducer 15' is a cross product -Warning: Shuffle Join MERGEJOIN[185][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13, $hdt$_14, $hdt$_15]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain cbo select case when (select count(*) from store_sales diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query90.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query90.q.out index 123b3c20db..973ffb507f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query90.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query90.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[152][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio from ( select count(*) amc diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out index 118d23b577..a82adb1b3a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out @@ -1,6 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[1182][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1189][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1196][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain cbo with cross_items as (select i_item_sk ss_item_sk diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out index e5b0d19715..33b8e32c82 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out @@ -1,7 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[443][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product -Warning: Shuffle Join MERGEJOIN[444][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product -Warning: Shuffle Join MERGEJOIN[446][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product -Warning: Shuffle Join MERGEJOIN[447][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 24' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out index 41d96ea303..860c8163b4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[297][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with ssales as (select c_last_name diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query28.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query28.q.out index 643e5b6259..bcc34b6cbb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query28.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query28.q.out @@ -1,8 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[102][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[103][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[104][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[105][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product -Warning: Shuffle Join MERGEJOIN[106][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain cbo select * from (select avg(ss_list_price) B1_LP diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out index 6af3162b28..ee0cbdb8ac 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out @@ -1,7 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[278][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 15' is a cross product -Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain cbo with my_customers as ( select distinct c_customer_sk diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out index 3e89c2da75..f165f630b0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[406][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 20' is a cross product PREHOOK: query: explain cbo with ss_items as (select i_item_id item_id diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out index 5e3deb3eb9..7fc377330d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out @@ -1,4 +1,3 @@ -Warning: Map Join MAPJOIN[170][bigTable=?] in task 'Map 11' is a cross product PREHOOK: query: explain cbo select a.ca_state state, count(*) cnt from customer_address a diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out index 253190d3bc..7ab6508473 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query61.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[266][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: explain cbo select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 from diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out index 0ac295bf8f..01ede309ec 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[225][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product PREHOOK: query: explain cbo with ss as (select s_store_sk, diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out index 1ac8fe2dcd..33ca2b5560 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out @@ -1,10 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[599][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[600][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product -Warning: Shuffle Join MERGEJOIN[601][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 8' is a cross product -Warning: Shuffle Join MERGEJOIN[602][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[603][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 10' is a cross product -Warning: Shuffle Join MERGEJOIN[604][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[605][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 12' is a cross product PREHOOK: query: explain cbo select * from diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out index 4a92def905..433abf77ca 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out @@ -1,18 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[171][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[172][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[173][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[174][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[175][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[176][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product -Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 8' is a cross product -Warning: Shuffle Join MERGEJOIN[178][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[179][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9]] in Stage 'Reducer 10' is a cross product -Warning: Shuffle Join MERGEJOIN[180][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[181][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11]] in Stage 'Reducer 12' is a cross product -Warning: Shuffle Join MERGEJOIN[182][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12]] in Stage 'Reducer 13' is a cross product -Warning: Shuffle Join MERGEJOIN[183][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[184][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13, $hdt$_14]] in Stage 'Reducer 15' is a cross product -Warning: Shuffle Join MERGEJOIN[185][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13, $hdt$_14, $hdt$_15]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain cbo select case when (select count(*) from store_sales diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out index b5aac32fd1..c7747dd519 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[152][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio from ( select count(*) amc diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index d7d652f34c..b69e00ae3d 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -7080,7 +7080,6 @@ POSTHOOK: query: CREATE TABLE `date_dim`( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@date_dim -Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain cbo with avg_sales as (select avg(quantity*list_price) average_sales from (select ss_quantity quantity @@ -7123,7 +7122,6 @@ HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Shuffle Join JOIN[35][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain cbo with avg_sales as (select avg(quantity*list_price) over( partition by list_price) average_sales from (select ss_quantity quantity