diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index a1a5c47412..3a8cbc56f6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -5101,7 +5101,7 @@ private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exceptio * @throws HiveException * Throws this exception if an unexpected error occurs. */ - private int createView(Hive db, CreateViewDesc crtView) throws HiveException { + private int createView(Hive db, CreateViewDesc crtView) throws HiveException, MetaException { Table oldview = db.getTable(crtView.getViewName(), false); if (oldview != null) { // Check whether we are replicating @@ -5135,11 +5135,27 @@ private int createView(Hive db, CreateViewDesc crtView) throws HiveException { crtView.getTblProps().get(Constants.MATERIALIZED_VIEW_VERSION)); final Path prevDataLocation = oldview.getDataLocation(); oldview.getTTable().getSd().setLocation(crtView.getLocation()); - // We update metastore - db.alterTable(crtView.getViewName(), oldview, null); // As table object is modified in this method, we need to update // the subsequent stats tasks (if any) updateChildrenStatsTask(oldview); + // We commit changes to the metastore + boolean failed = true; + HiveMetaHook hook = oldview.getStorageHandler() != null ? + oldview.getStorageHandler().getMetaHook() : null; + if (hook != null && hook instanceof DefaultHiveMetaHook) { + DefaultHiveMetaHook hiveMetaHook = (DefaultHiveMetaHook) hook; + try { + // We execute the OVERWRITE hook + hiveMetaHook.commitInsertTable(oldview.getTTable(), true); + // We update metastore + db.alterTable(crtView.getViewName(), oldview, null); + failed = false; + } finally { + if (failed) { + hiveMetaHook.rollbackInsertTable(oldview.getTTable(), true); + } + } + } // We need to delete the previous location for the materialized view deleteDir(prevDataLocation); addIfAbsentByName(new WriteEntity(oldview, WriteEntity.WriteType.DDL_EXCLUSIVE)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index a671331d89..02453c3973 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12550,19 +12550,6 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt unparseTranslator.enable(); if (isMaterialized) { - createVwDesc = new CreateViewDesc( - dbDotTable, cols, comment, tblProps, partColNames, - ifNotExists, isRebuild, rewriteEnabled, isAlterViewAs, - storageFormat.getInputFormat(), storageFormat.getOutputFormat(), - location, storageFormat.getSerde(), storageFormat.getStorageHandler(), - storageFormat.getSerdeProps()); - queryState.setCommandType(HiveOperation.CREATE_MATERIALIZED_VIEW); - - // For materialized views, properties should exist - if (createVwDesc.getTblProps() == null) { - createVwDesc.setTblProps(new HashMap<>()); - } - Path dataLocation; String mvVersion; if (isRebuild) { @@ -12585,6 +12572,9 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt } catch (Exception e) { throw new SemanticException(e); } + // Create view descriptor + createVwDesc = CreateViewDesc.fromTable(tab); + createVwDesc.setReplace(true); // Generate the new directory and increase the version dataLocation = tab.getDataLocation().getParent(); mvVersion = String.valueOf(Integer.parseInt( @@ -12593,6 +12583,17 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt addDbAndTabToOutputs(qualTabName, TableType.MATERIALIZED_VIEW, WriteEntity.WriteType.DDL_EXCLUSIVE); outputs.add(BaseSemanticAnalyzer.toWriteEntity(tab.getDataLocation(), conf)); } else { + // Create view descriptor + createVwDesc = new CreateViewDesc( + dbDotTable, cols, comment, tblProps, partColNames, + ifNotExists, false, rewriteEnabled, + storageFormat.getInputFormat(), storageFormat.getOutputFormat(), + location, storageFormat.getSerde(), storageFormat.getStorageHandler(), + storageFormat.getSerdeProps()); + // For materialized views, properties should exist + if (createVwDesc.getTblProps() == null) { + createVwDesc.setTblProps(new HashMap<>()); + } // Add version property ('0') and set up location correctly if (createVwDesc.getLocation() == null) { try { @@ -12612,6 +12613,7 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt // Set up the new directory and version in tblProps createVwDesc.setLocation(new Path(dataLocation, mvVersion).toString()); createVwDesc.getTblProps().put(Constants.MATERIALIZED_VIEW_VERSION, mvVersion); + queryState.setCommandType(HiveOperation.CREATE_MATERIALIZED_VIEW); } else { createVwDesc = new CreateViewDesc( dbDotTable, cols, comment, tblProps, partColNames, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java index 97baf25ea8..d0c1e80c81 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java @@ -82,19 +82,18 @@ public CreateViewDesc() { * @param tblProps * @param partColNames * @param ifNotExists - * @param orReplace - * @param isAlterViewAs + * @param replace + * @param rewriteEnabled * @param inputFormat * @param outputFormat * @param location - * @param serName * @param serde * @param storageHandler * @param serdeProps */ public CreateViewDesc(String viewName, List schema, String comment, Map tblProps, List partColNames, - boolean ifNotExists, boolean replace, boolean rewriteEnabled, boolean isAlterViewAs, + boolean ifNotExists, boolean replace, boolean rewriteEnabled, String inputFormat, String outputFormat, String location, String serde, String storageHandler, Map serdeProps) { this.viewName = viewName; @@ -106,7 +105,7 @@ public CreateViewDesc(String viewName, List schema, String comment, this.replace = replace; this.isMaterialized = true; this.rewriteEnabled = rewriteEnabled; - this.isAlterViewAs = isAlterViewAs; + this.isAlterViewAs = false; this.inputFormat = inputFormat; this.outputFormat = outputFormat; this.location = location; @@ -115,6 +114,33 @@ public CreateViewDesc(String viewName, List schema, String comment, this.serdeProps = serdeProps; } + /** + * Generates a descriptor from a metastore table object, copying + * its properties. + */ + public static CreateViewDesc fromTable(Table tab) { + CreateViewDesc cvd = new CreateViewDesc(); + cvd.setViewName(tab.getFullyQualifiedName()); + cvd.setViewOriginalText(tab.getViewOriginalText()); + cvd.setViewExpandedText(tab.getViewExpandedText()); + cvd.setSchema(tab.getAllCols()); + cvd.setTblProps(tab.getParameters()); + cvd.setPartColNames(tab.getPartColNames()); + cvd.setComment(tab.getProperty("comment")); + cvd.setMaterialized(tab.isMaterializedView()); + cvd.setRewriteEnabled(tab.isRewriteEnabled()); + cvd.setInputFormat(tab.getSd().getInputFormat()); + cvd.setOutputFormat(tab.getSd().getOutputFormat()); + cvd.setLocation(tab.getSd().getLocation()); + cvd.setSerde(tab.getSerializationLib()); + cvd.setStorageHandler( + tab.getStorageHandler() == null ? null : tab.getStorageHandler().toString()); + cvd.setSerdeProps( + tab.getSd().getSerdeInfo() == null ? + null : tab.getSd().getSerdeInfo().getParameters()); + return cvd; + } + /** * Used to create a view descriptor * @param viewName @@ -287,6 +313,10 @@ public void setOutputFormat(String outputFormat) { this.outputFormat = outputFormat; } + public void setMaterialized(boolean isMaterialized) { + this.isMaterialized = isMaterialized; + } + public boolean isMaterialized() { return isMaterialized; } @@ -294,18 +324,31 @@ public boolean isMaterialized() { public void setLocation(String location) { this.location = location; } + public String getLocation() { return location; } + public void setSerde(String serde) { + this.serde = serde; + } + public String getSerde() { return serde; } + public void setStorageHandler(String storageHandler) { + this.storageHandler = storageHandler; + } + public String getStorageHandler() { return storageHandler; } + public void setSerdeProps(Map serdeProps) { + this.serdeProps = serdeProps; + } + public Map getSerdeProps() { return serdeProps; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java index 3535fa4d02..5c245ac1d7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java @@ -93,7 +93,7 @@ public ImportTableDesc(String dbName, Table table) throws Exception { null, // comment passed as table params table.getParameters(), table.getPartColNames(), - false,false,false,false, + false,false,false, table.getSd().getInputFormat(), table.getSd().getOutputFormat(), null, // location: set to null here, can be overwritten by the IMPORT stmt diff --git a/ql/src/test/queries/clientpositive/druidmini_mv.q b/ql/src/test/queries/clientpositive/druidmini_mv.q index e059357602..3acbadf9a7 100644 --- a/ql/src/test/queries/clientpositive/druidmini_mv.q +++ b/ql/src/test/queries/clientpositive/druidmini_mv.q @@ -5,20 +5,28 @@ set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.strict.checks.cartesian.product=false; set hive.materializedview.rewriting=true; -create table cmv_basetable (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true'); - -insert into cmv_basetable values - (1, 'alfred', 10.30, 2), - (2, 'bob', 3.14, 3), - (2, 'bonnie', 172342.2, 3), - (3, 'calvin', 978.76, 3), - (3, 'charlie', 9.8, 1); +CREATE TABLE cmv_basetable +STORED AS orc +TBLPROPERTIES ('transactional'='true') +AS +SELECT cast(unix_timestamp() AS timestamp) AS t, + cast(a AS int) AS a, + cast(b AS varchar(256)) AS b, + cast(c AS decimal(10,2)) AS c, + cast(d AS int) AS d +FROM TABLE ( + VALUES + (1, 'alfred', 10.30, 2), + (2, 'bob', 3.14, 3), + (2, 'bonnie', 172342.2, 3), + (3, 'calvin', 978.76, 3), + (3, 'charlie', 9.8, 1)) as q (a, b, c, d); CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(current_timestamp() as timestamp with local time zone) as `__time`, a, b, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c FROM cmv_basetable WHERE a = 2; @@ -30,7 +38,7 @@ CREATE MATERIALIZED VIEW IF NOT EXISTS cmv_mat_view2 ENABLE REWRITE STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(current_timestamp() as timestamp with local time zone) as `__time`, a, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c FROM cmv_basetable WHERE a = 3; @@ -61,7 +69,7 @@ SELECT * FROM ( ON table1.a = table2.a); INSERT INTO cmv_basetable VALUES - (3, 'charlie', 15.8, 1); + (cast(unix_timestamp() AS timestamp), 3, 'charlie', 15.8, 1); -- TODO: CANNOT USE THE VIEW, IT IS OUTDATED EXPLAIN @@ -77,8 +85,8 @@ SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE d = 3) table2 ON table1.a = table2.a); --- REBUILD: TODO FOR MVS USING CUSTOM STORAGE HANDLERS --- ALTER MATERIALIZED VIEW cmv_mat_view REBUILD; +-- REBUILD +ALTER MATERIALIZED VIEW cmv_mat_view2 REBUILD; -- NOW IT CAN BE USED AGAIN EXPLAIN diff --git a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index 5a0b885f77..09d45dd019 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -1,38 +1,55 @@ -PREHOOK: query: create table cmv_basetable (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@cmv_basetable -POSTHOOK: query: create table cmv_basetable (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@cmv_basetable -PREHOOK: query: insert into cmv_basetable values - (1, 'alfred', 10.30, 2), - (2, 'bob', 3.14, 3), - (2, 'bonnie', 172342.2, 3), - (3, 'calvin', 978.76, 3), - (3, 'charlie', 9.8, 1) -PREHOOK: type: QUERY +unix_timestamp(void) is deprecated. Use current_timestamp instead. +unix_timestamp(void) is deprecated. Use current_timestamp instead. +PREHOOK: query: CREATE TABLE cmv_basetable +STORED AS orc +TBLPROPERTIES ('transactional'='true') +AS +SELECT cast(unix_timestamp() AS timestamp) AS t, + cast(a AS int) AS a, + cast(b AS varchar(256)) AS b, + cast(c AS decimal(10,2)) AS c, + cast(d AS int) AS d +FROM TABLE ( + VALUES + (1, 'alfred', 10.30, 2), + (2, 'bob', 3.14, 3), + (2, 'bonnie', 172342.2, 3), + (3, 'calvin', 978.76, 3), + (3, 'charlie', 9.8, 1)) as q (a, b, c, d) +PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default PREHOOK: Output: default@cmv_basetable -POSTHOOK: query: insert into cmv_basetable values - (1, 'alfred', 10.30, 2), - (2, 'bob', 3.14, 3), - (2, 'bonnie', 172342.2, 3), - (3, 'calvin', 978.76, 3), - (3, 'charlie', 9.8, 1) -POSTHOOK: type: QUERY +POSTHOOK: query: CREATE TABLE cmv_basetable +STORED AS orc +TBLPROPERTIES ('transactional'='true') +AS +SELECT cast(unix_timestamp() AS timestamp) AS t, + cast(a AS int) AS a, + cast(b AS varchar(256)) AS b, + cast(c AS decimal(10,2)) AS c, + cast(d AS int) AS d +FROM TABLE ( + VALUES + (1, 'alfred', 10.30, 2), + (2, 'bob', 3.14, 3), + (2, 'bonnie', 172342.2, 3), + (3, 'calvin', 978.76, 3), + (3, 'charlie', 9.8, 1)) as q (a, b, c, d) +POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default POSTHOOK: Output: default@cmv_basetable POSTHOOK: Lineage: cmv_basetable.a SCRIPT [] POSTHOOK: Lineage: cmv_basetable.b SCRIPT [] POSTHOOK: Lineage: cmv_basetable.c SCRIPT [] POSTHOOK: Lineage: cmv_basetable.d SCRIPT [] +POSTHOOK: Lineage: cmv_basetable.t SIMPLE [] PREHOOK: query: CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(current_timestamp() as timestamp with local time zone) as `__time`, a, b, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c FROM cmv_basetable WHERE a = 2 PREHOOK: type: CREATE_MATERIALIZED_VIEW @@ -43,7 +60,7 @@ POSTHOOK: query: CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(current_timestamp() as timestamp with local time zone) as `__time`, a, b, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c FROM cmv_basetable WHERE a = 2 POSTHOOK: type: CREATE_MATERIALIZED_VIEW @@ -67,6 +84,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} druid.datasource default.cmv_mat_view druid.segment.granularity HOUR +materialized.view.version 0 numFiles 0 numRows 2 rawDataSize 0 @@ -77,7 +95,7 @@ PREHOOK: query: CREATE MATERIALIZED VIEW IF NOT EXISTS cmv_mat_view2 ENABLE REWR STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(current_timestamp() as timestamp with local time zone) as `__time`, a, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c FROM cmv_basetable WHERE a = 3 PREHOOK: type: CREATE_MATERIALIZED_VIEW @@ -88,7 +106,7 @@ POSTHOOK: query: CREATE MATERIALIZED VIEW IF NOT EXISTS cmv_mat_view2 ENABLE REW STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ("druid.segment.granularity" = "HOUR") AS -SELECT cast(current_timestamp() as timestamp with local time zone) as `__time`, a, c +SELECT cast(t AS timestamp with local time zone) as `__time`, a, b, c FROM cmv_basetable WHERE a = 3 POSTHOOK: type: CREATE_MATERIALIZED_VIEW @@ -103,7 +121,8 @@ POSTHOOK: query: SELECT a, c FROM cmv_mat_view2 POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_mat_view2 #### A masked pattern was here #### -6 988.56 +3 9.80 +3 978.76 PREHOOK: query: SHOW TBLPROPERTIES cmv_mat_view2 PREHOOK: type: SHOW_TBLPROPERTIES POSTHOOK: query: SHOW TBLPROPERTIES cmv_mat_view2 @@ -111,6 +130,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} druid.datasource default.cmv_mat_view2 druid.segment.granularity HOUR +materialized.view.version 0 numFiles 0 numRows 2 rawDataSize 0 @@ -161,7 +181,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable POSTHOOK: Input: default@cmv_mat_view2 #### A masked pattern was here #### -6 988.56 +3 9.80 +3 978.76 Warning: Shuffle Join JOIN[6][tables = [cmv_mat_view2, $hdt$_0]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM ( @@ -187,17 +208,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 1 Data size: 9310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 10350 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((3 = a) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 9310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2070 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: decimal(10,2)) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 9310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2070 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 9310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2070 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,2)) TableScan alias: cmv_mat_view2 @@ -217,14 +238,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col1, _col5 - Statistics: Num rows: 2 Data size: 18622 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 4142 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: 3 (type: int), _col1 (type: decimal(10,2)), 3 (type: int), _col5 (type: decimal(10,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 18622 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 4142 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 18622 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 2 Data size: 4142 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -255,14 +276,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable POSTHOOK: Input: default@cmv_mat_view2 #### A masked pattern was here #### -3 988.56 3 978.76 +3 9.80 3 978.76 +3 978.76 3 978.76 +unix_timestamp(void) is deprecated. Use current_timestamp instead. +unix_timestamp(void) is deprecated. Use current_timestamp instead. PREHOOK: query: INSERT INTO cmv_basetable VALUES - (3, 'charlie', 15.8, 1) + (cast(unix_timestamp() AS timestamp), 3, 'chloe', 15.8, 1) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@cmv_basetable POSTHOOK: query: INSERT INTO cmv_basetable VALUES - (3, 'charlie', 15.8, 1) + (cast(unix_timestamp() AS timestamp), 3, 'chloe', 15.8, 1) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@cmv_basetable @@ -270,6 +294,7 @@ POSTHOOK: Lineage: cmv_basetable.a SCRIPT [] POSTHOOK: Lineage: cmv_basetable.b SCRIPT [] POSTHOOK: Lineage: cmv_basetable.c SCRIPT [] POSTHOOK: Lineage: cmv_basetable.d SCRIPT [] +POSTHOOK: Lineage: cmv_basetable.t SCRIPT [] Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM ( @@ -295,31 +320,31 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 19600 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (a = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 7840 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: decimal(10,2)) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 7840 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 7840 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,2)) TableScan alias: cmv_basetable - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 19600 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((3 = a) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3920 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: decimal(10,2)) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3920 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3920 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,2)) Reduce Operator Tree: Join Operator @@ -329,14 +354,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 35081 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 15682 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 3 (type: int), _col0 (type: decimal(10,2)), 3 (type: int), _col1 (type: decimal(10,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 35081 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 15682 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 35081 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 15682 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -368,7 +393,17 @@ POSTHOOK: Input: default@cmv_basetable 3 15.80 3 978.76 3 9.80 3 978.76 3 978.76 3 978.76 -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view2 REBUILD +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@cmv_basetable +PREHOOK: Output: database:default +PREHOOK: Output: default@cmv_mat_view2 +POSTHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view2 REBUILD +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cmv_mat_view2 +Warning: Shuffle Join JOIN[6][tables = [cmv_mat_view2, $hdt$_0]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 @@ -393,32 +428,28 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: decimal(10,2)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(10,2)) - TableScan - alias: cmv_basetable - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 19600 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((3 = a) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3920 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: decimal(10,2)) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3920 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 17540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3920 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(10,2)) + TableScan + alias: cmv_mat_view2 + properties: + druid.query.json {"queryType":"select","dataSource":"default.cmv_mat_view2","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":["c"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: c (type: decimal(10,2)) Reduce Operator Tree: Join Operator condition map: @@ -426,15 +457,15 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 35081 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col5 + Statistics: Num rows: 3 Data size: 11763 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: 3 (type: int), _col0 (type: decimal(10,2)), 3 (type: int), _col1 (type: decimal(10,2)) + expressions: 3 (type: int), _col1 (type: decimal(10,2)), 3 (type: int), _col5 (type: decimal(10,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 35081 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 11763 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 35081 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 11763 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -446,7 +477,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[6][tables = [cmv_mat_view2, $hdt$_0]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 JOIN @@ -454,6 +485,7 @@ PREHOOK: query: SELECT * FROM ( ON table1.a = table2.a) PREHOOK: type: QUERY PREHOOK: Input: default@cmv_basetable +PREHOOK: Input: default@cmv_mat_view2 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 @@ -462,6 +494,7 @@ POSTHOOK: query: SELECT * FROM ( ON table1.a = table2.a) POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable +POSTHOOK: Input: default@cmv_mat_view2 #### A masked pattern was here #### 3 15.80 3 978.76 3 9.80 3 978.76 diff --git a/ql/src/test/results/clientpositive/materialized_view_create_rewrite_3.q.out b/ql/src/test/results/clientpositive/materialized_view_create_rewrite_3.q.out index cc26f5ac74..5b28669e34 100644 --- a/ql/src/test/results/clientpositive/materialized_view_create_rewrite_3.q.out +++ b/ql/src/test/results/clientpositive/materialized_view_create_rewrite_3.q.out @@ -542,9 +542,24 @@ STAGE PLANS: Create View columns: a int, c decimal(10,2) table properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} materialized.view.version 1 + numFiles 1 + numRows 2 + rawDataSize 232 + totalSize 325 +#### A masked pattern was here #### + expanded text: SELECT `cmv_basetable`.`a`, `cmv_basetable_2`.`c` + FROM `default`.`cmv_basetable` JOIN `default`.`cmv_basetable_2` ON (`cmv_basetable`.`a` = `cmv_basetable_2`.`a`) + WHERE `cmv_basetable_2`.`c` > 10.0 + GROUP BY `cmv_basetable`.`a`, `cmv_basetable_2`.`c` name: default.cmv_mat_view + original text: SELECT cmv_basetable.a, cmv_basetable_2.c + FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a) + WHERE cmv_basetable_2.c > 10.0 + GROUP BY cmv_basetable.a, cmv_basetable_2.c replace: true + rewrite enabled: true Stage: Stage-3 Stats Work diff --git a/ql/src/test/results/clientpositive/materialized_view_create_rewrite_4.q.out b/ql/src/test/results/clientpositive/materialized_view_create_rewrite_4.q.out index 68a82a8481..94628182c6 100644 --- a/ql/src/test/results/clientpositive/materialized_view_create_rewrite_4.q.out +++ b/ql/src/test/results/clientpositive/materialized_view_create_rewrite_4.q.out @@ -629,9 +629,24 @@ STAGE PLANS: Create View columns: a int, c decimal(10,2) table properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} materialized.view.version 1 + numFiles 1 + numRows 2 + rawDataSize 232 + totalSize 325 +#### A masked pattern was here #### + expanded text: SELECT `cmv_basetable`.`a`, `cmv_basetable_2`.`c` + FROM `default`.`cmv_basetable` JOIN `default`.`cmv_basetable_2` ON (`cmv_basetable`.`a` = `cmv_basetable_2`.`a`) + WHERE `cmv_basetable_2`.`c` > 10.0 + GROUP BY `cmv_basetable`.`a`, `cmv_basetable_2`.`c` name: default.cmv_mat_view + original text: SELECT cmv_basetable.a, cmv_basetable_2.c + FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a) + WHERE cmv_basetable_2.c > 10.0 + GROUP BY cmv_basetable.a, cmv_basetable_2.c replace: true + rewrite enabled: true Stage: Stage-3 Stats Work