diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java b/common/src/java/org/apache/hadoop/hive/conf/Constants.java index ee954d9aac..797d6820a0 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java +++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java @@ -67,4 +67,5 @@ public static final String HADOOP_CREDENTIAL_PROVIDER_PATH_CONFIG = "hadoop.security.credential.provider.path"; public static final String MATERIALIZED_VIEW_REWRITING_TIME_WINDOW = "rewriting.time.window"; + public static final String MATERIALIZED_VIEW_SIMULATED_ROW_COUNT = "materialization.simulated.rowCount"; } diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b213609f39..b78373a5b4 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1687,6 +1687,11 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_MATERIALIZED_VIEW_SERDE("hive.materializedview.serde", "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "Default SerDe used for materialized views"), + HIVE_SIMULATION_MODE_ENABLED("hive.simulation.enable", false, + "This property enables the simulated mode. In simulated mode, the optimizer will use virtual materialized views. " + + "This is useful to test whether a certain query rewriting would be triggered if the materialized view would be present " + + "in the system."), + HIVE_ENABLE_JDBC_PUSHDOWN("hive.jdbc.pushdown.enable", true, "Flag to control enabling pushdown of operators into JDBC connection and subsequent SQL generation\n" + "using Calcite."), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 18089d59fd..fd4200de56 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -389,6 +389,13 @@ public boolean getExplainLogical() { return explainConfig != null && explainConfig.isLogical(); } + /** + * Find whether the current query is a cbo explain query + */ + public boolean getExplainCbo() { + return explainConfig != null && explainConfig.isCbo(); + } + public AnalyzeState getExplainAnalyze() { if (explainConfig != null) { return explainConfig.getAnalyze(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index dfa7e5e0a8..caf7b0246f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -54,6 +54,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.util.concurrent.ListenableFuture; +import org.apache.calcite.plan.RelOptMaterialization; import org.apache.calcite.rel.RelNode; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; @@ -4979,20 +4980,26 @@ private int createView(Hive db, CreateViewDesc crtView) throws HiveException { } else { // We create new view Table tbl = crtView.toTable(conf); - // We set the signature for the view if it is a materialized view - if (tbl.isMaterializedView()) { - CreationMetadata cm = - new CreationMetadata(MetaStoreUtils.getDefaultCatalog(conf), tbl.getDbName(), - tbl.getTableName(), ImmutableSet.copyOf(crtView.getTablesUsed())); - cm.setValidTxnList(conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY)); - tbl.getTTable().setCreationMetadata(cm); - } - db.createTable(tbl, crtView.getIfNotExists()); - addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK)); - //set lineage info - DataContainer dc = new DataContainer(tbl.getTTable()); - queryState.getLineageState().setLineage(new Path(crtView.getViewName()), dc, tbl.getCols()); + if (crtView.isSimulated()) { + // Save in the session state + SessionState.get().createSimulatedMaterializedView(tbl); + } else { + // We set the signature for the view if it is a materialized view + if (tbl.isMaterializedView()) { + CreationMetadata cm = + new CreationMetadata(MetaStoreUtils.getDefaultCatalog(conf), tbl.getDbName(), + tbl.getTableName(), ImmutableSet.copyOf(crtView.getTablesUsed())); + cm.setValidTxnList(conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY)); + tbl.getTTable().setCreationMetadata(cm); + } + db.createTable(tbl, crtView.getIfNotExists()); + addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK)); + + //set lineage info + DataContainer dc = new DataContainer(tbl.getTTable()); + queryState.getLineageState().setLineage(new Path(crtView.getViewName()), dc, tbl.getCols()); + } } return 0; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index b6b7c53123..a53443560a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -47,6 +47,7 @@ import org.apache.calcite.rel.type.RelDataTypeImpl; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl; @@ -88,7 +89,7 @@ /* Singleton */ private static final HiveMaterializedViewsRegistry SINGLETON = new HiveMaterializedViewsRegistry(); - /* Key is the database name. Value a map from the qualified name to the view object. */ + /* Key is the database name. Value a map from the name to the view object. */ private final ConcurrentMap> materializedViews = new ConcurrentHashMap>(); @@ -217,6 +218,25 @@ private RelOptMaterialization addMaterializedView(HiveConf conf, Table materiali // Start the process to add MV to the cache // First we parse the view query and create the materialization object + final RelOptMaterialization materialization = + generateRelOptMaterialization(conf, materializedViewTable); + if (opType == OpType.CREATE) { + // You store the materialized view + cq.put(materializedViewTable.getTableName(), materialization); + } else { + // For LOAD, you only add it if it does exist as you might be loading an outdated MV + cq.putIfAbsent(materializedViewTable.getTableName(), materialization); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Created materialized view for rewriting: " + + TableName.getDbTable(materializedViewTable.getDbName(), materializedViewTable.getTableName())); + } + return materialization; + } + + public static RelOptMaterialization generateRelOptMaterialization(HiveConf conf, Table materializedViewTable) { + // We parse the view query and create the materialization object final String viewQuery = materializedViewTable.getViewExpandedText(); final RelNode viewScan = createMaterializedViewScan(conf, materializedViewTable); if (viewScan == null) { @@ -233,20 +253,8 @@ private RelOptMaterialization addMaterializedView(HiveConf conf, Table materiali return null; } - RelOptMaterialization materialization = new RelOptMaterialization(viewScan, queryRel, + return new RelOptMaterialization(viewScan, queryRel, null, viewScan.getTable().getQualifiedName()); - if (opType == OpType.CREATE) { - // You store the materialized view - cq.put(materializedViewTable.getTableName(), materialization); - } else { - // For LOAD, you only add it if it does exist as you might be loading an outdated MV - cq.putIfAbsent(materializedViewTable.getTableName(), materialization); - } - - if (LOG.isDebugEnabled()) { - LOG.debug("Created materialized view for rewriting: " + viewScan.getTable().getQualifiedName()); - } - return materialization; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 995ff9b68c..8d8f9acf21 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -17,9 +17,11 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import com.google.common.collect.Sets; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -49,7 +51,9 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.apache.calcite.util.mapping.IntPair; +import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -57,6 +61,7 @@ import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo.ForeignKeyCol; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.PartitionIterable; import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo; import org.apache.hadoop.hive.ql.metadata.Table; @@ -428,15 +433,18 @@ public RelDistribution getDistribution() { @Override public double getRowCount() { if (rowCount == -1) { - if (null == partitionList) { - // we are here either unpartitioned table or partitioned table with no - // predicates - computePartitionList(hiveConf, null, new HashSet()); + if (needsSimulatedStats()) { + rowCount = Double.parseDouble(hiveTblMetadata.getProperty(Constants.MATERIALIZED_VIEW_SIMULATED_ROW_COUNT)); + } else { + if (null == partitionList) { + // we are here either unpartitioned table or partitioned table with no + // predicates + computePartitionList(hiveConf, null, new HashSet()); + } + rowCount = StatsUtils.getNumRows(hiveConf, getNonPartColumns(), hiveTblMetadata, + partitionList, noColsMissingStats); } - rowCount = StatsUtils.getNumRows(hiveConf, getNonPartColumns(), hiveTblMetadata, - partitionList, noColsMissingStats); } - return rowCount; } @@ -481,10 +489,70 @@ public void computePartitionList(HiveConf conf, RexNode pruneNode, Set } private void updateColStats(Set projIndxLst, boolean allowMissingStats) { - List nonPartColNamesThatRqrStats = new ArrayList(); - List nonPartColIndxsThatRqrStats = new ArrayList(); - List partColNamesThatRqrStats = new ArrayList(); - List partColIndxsThatRqrStats = new ArrayList(); + if (needsSimulatedStats()) { + // If we need stats for a materialized view in simulated mode, we will + // estimate the stats and populate the needed data structures + + // Number of rows + rowCount = Double.parseDouble(hiveTblMetadata.getProperty(Constants.MATERIALIZED_VIEW_SIMULATED_ROW_COUNT)); + + // Get column stats that are not already available + List schema = new ArrayList<>(); + List neededColumns = new ArrayList<>(); + List colIndexesRequired = new ArrayList<>(); + for (Integer pi : projIndxLst) { + if (hiveColStatsMap.get(pi) == null) { + ColumnInfo tmp; + if ((tmp = hiveNonPartitionColsMap.get(pi)) != null || + (tmp = hivePartitionColsMap.get(pi)) != null) { + schema.add(tmp); + neededColumns.add(tmp.getInternalName()); + colIndexesRequired.add(pi); + } else { + String logMsg = "Unable to find Column Index: " + pi + ", in " + + hiveTblMetadata.getCompleteName(); + LOG.error(logMsg); + throw new RuntimeException(logMsg); + } + } + } + List estimatedColumnStats = StatsUtils.estimateStats(hiveTblMetadata, + schema, neededColumns, hiveConf, (long) rowCount); + for (int i = 0; i < colIndexesRequired.size(); i++) { + hiveColStatsMap.put(colIndexesRequired.get(i), estimatedColumnStats.get(i)); + } + + // Populate information about the partition list (we consider single partition) + if (partitionList == null) { + try { + partitionList = new PrunedPartitionList(hiveTblMetadata, hiveTblMetadata.getFullyQualifiedName() + ";", + Sets.newHashSet(new Partition(hiveTblMetadata)), null, false); + partitionCache.put(partitionList.getKey(), partitionList); + } catch (HiveException e) { + String logMsg = "Collecting stats for table: " + hiveTblMetadata.getTableName() + " failed."; + LOG.error(logMsg, e); + throw new RuntimeException(logMsg, e); + } + } + + // Add to stats cache + ColumnStatsList colStatsCached = colStatsCache.get(partitionList.getKey()); + if (colStatsCached == null) { + colStatsCached = new ColumnStatsList(); + colStatsCache.put(partitionList.getKey(), colStatsCached); + } + for (int i = 0; i < neededColumns.size(); i++) { + colStatsCached.put(neededColumns.get(i), hiveColStatsMap.get(colIndexesRequired.get(i))); + } + colStatsCached.updateState(State.COMPLETE); + + return; + } + + List nonPartColNamesThatRqrStats = new ArrayList<>(); + List nonPartColIndxsThatRqrStats = new ArrayList<>(); + List partColNamesThatRqrStats = new ArrayList<>(); + List partColIndxsThatRqrStats = new ArrayList<>(); Set colNamesFailedStats = new HashSet(); // 1. Separate required columns to Non Partition and Partition Cols @@ -764,4 +832,10 @@ public String getPartitionListKey() { return partitionList != null ? partitionList.getKey() : null; } + private boolean needsSimulatedStats() { + // If we are in simulation mode and this is a materialized view, we just use + // estimated stats + return HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_SIMULATION_MODE_ENABLED) && + hiveTblMetadata.isMaterializedView(); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index fb31254bbf..a6163039eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -1482,6 +1482,7 @@ private boolean createDynPartSpec(ASTNode ast) { } return false; } + public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec) throws SemanticException { assert (ast.getToken().getType() == HiveParser.TOK_TAB diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index aacf9e3403..bd2a102b41 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -439,7 +439,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept List oldHints = new ArrayList<>(); // Cache the hints before CBO runs and removes them. // Use the hints later in top level QB. - getHintsFromQB(getQB(), oldHints); + getHintsFromQB(getQB(), oldHints); // Note: for now, we don't actually pass the queryForCbo to CBO, because // it accepts qb, not AST, and can also access all the private stuff in @@ -499,6 +499,17 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept } catch (SemanticException e) { throw new CalciteViewSemanticException(e.getMessage()); } + + // If the materialized view is simulated, just add estimations about row count + if (createVwDesc.isSimulated()) { + Map tblProps = createVwDesc.getTblProps(); + if (tblProps == null) { + tblProps = new HashMap<>(); + createVwDesc.setTblProps(tblProps); + } + tblProps.put(Constants.MATERIALIZED_VIEW_SIMULATED_ROW_COUNT, + Double.toString(RelMetadataQuery.instance().getRowCount(newPlan))); + } } else if (cboCtx.type == PreCboCtx.Type.CTAS) { // CTAS init(false); @@ -2183,10 +2194,15 @@ private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode ba materializations = db.getValidMaterializedView(mvRebuildDbName, mvRebuildName, getTablesUsed(basePlan), true, getTxnMgr()); } else { - // This is not a rebuild, we retrieve all the materializations. In turn, we do not need - // to force the materialization contents to be up-to-date, as this is not a rebuild, and - // we apply the user parameters (HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW) instead. - materializations = db.getAllValidMaterializedViews(getTablesUsed(basePlan), false, getTxnMgr()); + // This is not a rebuild + if (ctx.isExplainPlan() && HiveConf.getBoolVar(conf, ConfVars.HIVE_SIMULATION_MODE_ENABLED)) { + materializations = SessionState.get().getSimulatedMaterializedViews(); + } else { + // We retrieve all the materializations. In turn, we do not need to force the materialization + // contents to be up-to-date, as this is not a rebuild, and we apply the user parameters + // (HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW) instead. + materializations = db.getAllValidMaterializedViews(getTablesUsed(basePlan), false, getTxnMgr()); + } } // We need to use the current cluster for the scan operator on views, // otherwise the planner will throw an Exception (different planners) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index b477480e0a..6e9ba7ef9a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -40,6 +40,8 @@ import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -1441,21 +1443,44 @@ private void analyzeSwitchDatabase(ASTNode ast) throws SemanticException { private void analyzeDropTable(ASTNode ast, TableType expectedType) throws SemanticException { String tableName = getUnescapedName((ASTNode) ast.getChild(0)); - boolean ifExists = (ast.getFirstChildWithType(HiveParser.TOK_IFEXISTS) != null); // we want to signal an error if the table/view doesn't exist and we're // configured not to fail silently + boolean ifExists = (ast.getFirstChildWithType(HiveParser.TOK_IFEXISTS) != null); boolean throwException = !ifExists && !HiveConf.getBoolVar(conf, ConfVars.DROPIGNORESNONEXISTENT); - ReplicationSpec replicationSpec = new ReplicationSpec(ast); + boolean simulated = false; + if (expectedType == TableType.MATERIALIZED_VIEW) { + simulated = (ast.getFirstChildWithType(HiveParser.TOK_SIMULATED) != null); + } + + Table tab; + if (simulated) { + // This materialized view lives in the user session, just do the proper + // checks and delete + String[] names = Utilities.getDbTableName(tableName); + Pair simulatedMaterializedView = + SessionState.get().getSimulatedMaterializedView(names[0], names[1]); + tab = simulatedMaterializedView != null ? simulatedMaterializedView.getLeft() : null; + if (tab == null && throwException) { + throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName)); + } + + inputs.add(new ReadEntity(tab)); + outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_EXCLUSIVE)); + SessionState.get().dropSimulatedMaterializedView(tab); + return; + } else { + tab = getTable(tableName, throwException); + } - Table tab = getTable(tableName, throwException); if (tab != null) { inputs.add(new ReadEntity(tab)); outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_EXCLUSIVE)); } boolean ifPurge = (ast.getFirstChildWithType(HiveParser.KW_PURGE) != null); + ReplicationSpec replicationSpec = new ReplicationSpec(ast); DropTableDesc dropTblDesc = new DropTableDesc(tableName, expectedType, ifExists, ifPurge, replicationSpec); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropTblDesc))); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 253633cfb3..e1738dae42 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -381,6 +381,7 @@ KW_UNMANAGED: 'UNMANAGED'; KW_APPLICATION: 'APPLICATION'; KW_SYNC: 'SYNC'; KW_AST: 'AST'; +KW_SIMULATED: 'SIMULATED'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 1ffdec0fe1..dcb9410802 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -428,6 +428,7 @@ TOK_REPLACE; TOK_LIKERP; TOK_UNMANAGED; TOK_INPUTFORMAT; +TOK_SIMULATED; } @@ -619,6 +620,7 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_PATH", "PATH"); xlateMap.put("KW_AST", "AST"); xlateMap.put("KW_TRANSACTIONAL", "TRANSACTIONAL"); + xlateMap.put("KW_SIMULATED", "SIMULATED"); // Operators xlateMap.put("DOT", "."); @@ -1958,10 +1960,11 @@ createMaterializedViewStatement pushMsg("create materialized view statement", state); } @after { popMsg(state); } - : KW_CREATE KW_MATERIALIZED KW_VIEW (ifNotExists)? name=tableName + : KW_CREATE simulated? KW_MATERIALIZED KW_VIEW (ifNotExists)? name=tableName rewriteDisabled? tableComment? viewPartition? tableRowFormat? tableFileFormat? tableLocation? tablePropertiesPrefixed? KW_AS selectStatementWithCTE -> ^(TOK_CREATE_MATERIALIZED_VIEW $name + simulated? ifNotExists? rewriteDisabled? tableComment? @@ -1974,10 +1977,16 @@ createMaterializedViewStatement ) ; +simulated +@init { pushMsg("simulated def", state); } +@after { popMsg(state); } + : KW_SIMULATED -> ^(TOK_SIMULATED) + ; + dropMaterializedViewStatement @init { pushMsg("drop materialized view statement", state); } @after { popMsg(state); } - : KW_DROP KW_MATERIALIZED KW_VIEW ifExists? viewName -> ^(TOK_DROP_MATERIALIZED_VIEW viewName ifExists?) + : KW_DROP simulated? KW_MATERIALIZED KW_VIEW ifExists? viewName -> ^(TOK_DROP_MATERIALIZED_VIEW viewName simulated? ifExists?) ; showFunctionIdentifier diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 417955cde7..5a89241791 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -806,7 +806,7 @@ nonReserved | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_QUERY | KW_QUARTER | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE - | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SHARED + | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SIMULATED | KW_SHARED | KW_SHOW | KW_SHOW_DATABASE | KW_SKEWED | KW_SORT | KW_SORTED | KW_SSL | KW_STATISTICS | KW_STORED | KW_AST | KW_STREAMTABLE | KW_STRING | KW_STRUCT | KW_TABLES | KW_TBLPROPERTIES | KW_TEMPORARY | KW_TERMINATED | KW_TINYINT | KW_TOUCH | KW_TRANSACTIONAL | KW_TRANSACTIONS | KW_UNARCHIVE | KW_UNDO | KW_UNIONTYPE | KW_UNLOCK | KW_UNSET diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 54f34f6b0f..2a0291c42e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -54,6 +54,7 @@ import org.antlr.runtime.tree.TreeVisitorAction; import org.antlr.runtime.tree.TreeWizard; import org.antlr.runtime.tree.TreeWizard.ContextVisitor; +import org.apache.calcite.plan.RelOptMaterialization; import org.apache.calcite.rel.RelNode; import org.apache.calcite.util.ImmutableBitSet; import org.apache.commons.collections.CollectionUtils; @@ -72,6 +73,7 @@ import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; +import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; @@ -163,6 +165,7 @@ import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc; import org.apache.hadoop.hive.ql.optimizer.lineage.Generator; @@ -2108,6 +2111,7 @@ private void getMetaData(QB qb, ReadEntity parentInput) // do a deep copy, in case downstream changes it. tab = new Table(tab.getTTable().deepCopy()); } + if (tab == null || tab.getDbName().equals(SessionState.get().getCurrentDatabase())) { Table materializedTab = ctx.getMaterializedTable(cteName); @@ -2127,10 +2131,18 @@ private void getMetaData(QB qb, ReadEntity parentInput) } } + if (tab == null && tabName.equals(DUMMY_DATABASE + "." + DUMMY_TABLE)) { + continue; + } + + if (tab == null && ctx.isExplainPlan()) { + String[] names = Utilities.getDbTableName(tabName); + Pair simulatedMaterializedView = + SessionState.get().getSimulatedMaterializedView(names[0], names[1]); + tab = simulatedMaterializedView != null ? simulatedMaterializedView.getLeft() : null; + } + if (tab == null) { - if(tabName.equals(DUMMY_DATABASE + "." + DUMMY_TABLE)) { - continue; - } ASTNode src = qb.getParseInfo().getSrcForAlias(alias); if (null != src) { throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src)); @@ -12389,6 +12401,14 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce if (createVwDesc.isMaterialized()) { createVwDesc.setTablesUsed(getTablesUsed(pCtx)); + + if (createVwDesc.isSimulated()) { + // If this is a simulated materialized view, we do not need to optimize + // or translate the physical plan. + ctx.setResDir(null); + ctx.setResFile(null); + return; + } } else { // Since we're only creating a view (not executing it), we don't need to // optimize or translate the plan (and in fact, those procedures can @@ -12460,7 +12480,7 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce // 9. Optimize Physical op tree & Translate to target execution engine (MR, // TEZ..) - if (!ctx.getExplainLogical()) { + if (!ctx.getExplainCbo() && !ctx.getExplainLogical()) { TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx); compiler.init(queryState, console, db); compiler.compile(pCtx, rootTasks, inputs, outputs); @@ -13578,6 +13598,7 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt List cols = null; boolean ifNotExists = false; boolean rewriteEnabled = true; + boolean simulated = false; boolean orReplace = false; boolean isAlterViewAs = false; String comment = null; @@ -13605,6 +13626,9 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt case HiveParser.TOK_REWRITE_DISABLED: rewriteEnabled = false; break; + case HiveParser.TOK_SIMULATED: + simulated = true; + break; case HiveParser.TOK_ORREPLACE: orReplace = true; break; @@ -13654,6 +13678,10 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt throw new SemanticException("Can't combine IF NOT EXISTS and OR REPLACE."); } + if (!rewriteEnabled && simulated) { + throw new SemanticException("Cannot add simulated materialized view without rewriting enabled."); + } + if (isMaterialized && !ifNotExists) { // Verify that the table does not already exist // dumpTable is only used to check the conflict for non-temporary tables @@ -13678,10 +13706,14 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt if (isMaterialized) { createVwDesc = new CreateViewDesc( dbDotTable, cols, comment, tblProps, partColNames, - ifNotExists, isRebuild, rewriteEnabled, isAlterViewAs, + ifNotExists, isRebuild, rewriteEnabled, simulated, isAlterViewAs, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps()); + if (simulated) { + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + createVwDesc))); + } addDbAndTabToOutputs(qualTabName, TableType.MATERIALIZED_VIEW, false, tblProps); queryState.setCommandType(HiveOperation.CREATE_MATERIALIZED_VIEW); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java index 7130aba597..3e7b6cb474 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java @@ -53,6 +53,7 @@ private String originalText; private String expandedText; private boolean rewriteEnabled; + private boolean simulated; private List schema; private Map tblProps; private List partColNames; @@ -97,8 +98,8 @@ public CreateViewDesc() { */ public CreateViewDesc(String viewName, List schema, String comment, Map tblProps, List partColNames, - boolean ifNotExists, boolean replace, boolean rewriteEnabled, boolean isAlterViewAs, - String inputFormat, String outputFormat, String location, + boolean ifNotExists, boolean replace, boolean rewriteEnabled, boolean simulated, + boolean isAlterViewAs, String inputFormat, String outputFormat, String location, String serde, String storageHandler, Map serdeProps) { this.viewName = viewName; this.schema = schema; @@ -109,6 +110,7 @@ public CreateViewDesc(String viewName, List schema, String comment, this.replace = replace; this.isMaterialized = true; this.rewriteEnabled = rewriteEnabled; + this.simulated = simulated; this.isAlterViewAs = isAlterViewAs; this.inputFormat = inputFormat; this.outputFormat = outputFormat; @@ -146,6 +148,7 @@ public CreateViewDesc(String viewName, List schema, String comment, this.isAlterViewAs = isAlterViewAs; this.isMaterialized = false; this.rewriteEnabled = false; + this.simulated = false; this.inputFormat = inputFormat; this.outputFormat = outputFormat; this.serde = serde; @@ -256,6 +259,10 @@ public void setTablesUsed(Set tablesUsed) { this.tablesUsed = tablesUsed; } + public boolean isSimulated() { + return simulated; + } + @Explain(displayName = "replace", displayOnlyOnTrue = true) public boolean isReplace() { return replace; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java index 50b43bad31..3eb077e943 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java @@ -96,7 +96,7 @@ public ImportTableDesc(String dbName, Table table) throws Exception { null, // comment passed as table params table.getParameters(), table.getPartColNames(), - false,false,false,false, + false,false,false,false,false, table.getSd().getInputFormat(), table.getSd().getOutputFormat(), null, // location: set to null here, can be overwritten by the IMPORT stmt diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index de5cd8b992..4758050c41 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -42,12 +42,15 @@ import java.util.UUID; import java.util.concurrent.CancellationException; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; +import org.apache.calcite.plan.RelOptMaterialization; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -55,6 +58,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.common.io.SessionStream; import org.apache.hadoop.hive.common.log.ProgressMonitor; import org.apache.hadoop.hive.common.type.Timestamp; @@ -88,6 +92,7 @@ import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; import org.apache.hadoop.hive.ql.metadata.Table; @@ -131,12 +136,17 @@ /** * Concurrent since SessionState is often propagated to workers in thread pools */ - private final Map> tempTables = new ConcurrentHashMap<>(); + private final Map> tempTables = + new ConcurrentHashMap<>(); private final Map> tempTableColStats = new ConcurrentHashMap<>(); private final Map tempPartitions = new ConcurrentHashMap<>(); + /* Key is the database name. Value a map from the name to the view object. */ + private final Map>> simulatedMaterializedViews = + new ConcurrentHashMap<>(); + protected ClassLoader parentLoader; // Session-scope compile lock. @@ -1921,6 +1931,7 @@ public void applyAuthorizationPolicy() throws HiveException { public Map> getTempTables() { return tempTables; } + public Map getTempPartitions() { return tempPartitions; } @@ -1929,6 +1940,57 @@ public void applyAuthorizationPolicy() throws HiveException { return tempTableColStats; } + public void createSimulatedMaterializedView(Table materializedViewTable) throws HiveException { + RelOptMaterialization materialization = + HiveMaterializedViewsRegistry.generateRelOptMaterialization(sessionConf, materializedViewTable); + // We are going to create the map for each view in the given database + Map> cq = + new ConcurrentHashMap<>(); + final Map > prevCq = simulatedMaterializedViews.putIfAbsent( + materializedViewTable.getDbName(), cq); + if (prevCq != null) { + cq = prevCq; + } + // Now we add the new simulated materialization + Pair prevMaterialization = + cq.putIfAbsent(materializedViewTable.getTableName(), Pair.of(materializedViewTable, materialization)); + if (prevMaterialization != null) { + throw new HiveException("Simulated materialization with same name already exists"); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Created simulated materialized view: " + + TableName.getDbTable(materializedViewTable.getDbName(), materializedViewTable.getTableName())); + } + } + + public Pair getSimulatedMaterializedView(String dbName, String tblName) { + final Map> cq = simulatedMaterializedViews.get(dbName); + if (cq != null) { + return cq.get(tblName); + } + return null; + } + + public List getSimulatedMaterializedViews() { + List result = new ArrayList<>(); + for (Map> v : simulatedMaterializedViews.values()) { + for (Pair p : v.values()) { + result.add(p.getRight()); + } + } + return result; + } + + public void dropSimulatedMaterializedView(Table materializedViewTable) { + simulatedMaterializedViews.get(materializedViewTable.getDbName()).remove(materializedViewTable.getTableName()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Removed simulated materialized view: " + + TableName.getDbTable(materializedViewTable.getDbName(), materializedViewTable.getTableName())); + } + } + /** * @return ip address for user running the query */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 2a7cf8c897..be69fae8d7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -209,8 +209,7 @@ public static long getNumRows(HiveConf conf, List schema, Table tabl } private static void estimateStatsForMissingCols(List neededColumns, List columnStats, - Table table, HiveConf conf, long nr, List schema) { - + Table table, HiveConf conf, long nr, List schema) { Set neededCols = new HashSet<>(neededColumns); Set colsWithStats = new HashSet<>(); @@ -974,11 +973,9 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ return cs; } - private static List estimateStats(Table table, List schema, + public static List estimateStats(Table table, List schema, List neededColumns, HiveConf conf, long nr) { - List stats = new ArrayList(neededColumns.size()); - for (int i = 0; i < neededColumns.size(); i++) { ColStatistics cs = estimateColStats(nr, neededColumns.get(i), conf, schema); stats.add(cs);