diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index d406f51443..77d9c7fdbd 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1648,6 +1648,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "tries to modify the original materialization contents to reflect the latest changes to the\n" + "materialized view source tables, instead of rebuilding the contents fully. Incremental rebuild\n" + "is based on the materialized view algebraic incremental rewriting."), + HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL_FACTOR("hive.materializedview.rebuild.incremental.factor", 0.1f, + "The estimated cost of the resulting plan for incremental maintenance of materialization\n" + + "with aggregations will be multiplied by this value. Reducing the value can be useful to\n" + + "favour incremental rebuild over full rebuild."), HIVE_MATERIALIZED_VIEW_FILE_FORMAT("hive.materializedview.fileformat", "ORC", new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC"), "Default file format for CREATE MATERIALIZED VIEW statement"), diff --git a/pom.xml b/pom.xml index 7503cff532..54df091159 100644 --- a/pom.xml +++ b/pom.xml @@ -125,7 +125,7 @@ 1.12.0 1.8.2 0.8.0.RELEASE - 1.17.0 + 1.18.0-SNAPSHOT 4.2.4 4.1.17 4.1.19 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java index fb9672ae88..bfc157cbff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewRule.java @@ -50,7 +50,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; import java.util.List; -import java.util.Map; /** * Enable join and aggregate materialized view rewriting @@ -65,7 +64,8 @@ * the root of the plan. */ private static final HepProgram PROGRAM = new HepProgramBuilder() - .addRuleInstance(HiveExtractRelNodeRule.INSTANCE) + .addRuleInstance(HiveHepExtractRelNodeRule.INSTANCE) + .addRuleInstance(HiveVolcanoExtractRelNodeRule.INSTANCE) .addRuleInstance(HiveTableScanProjectInsert.INSTANCE) .addRuleCollection( ImmutableList.of( @@ -102,6 +102,15 @@ new HiveMaterializedViewOnlyAggregateRule(HiveRelFactories.HIVE_BUILDER, true, PROGRAM); + public static final RelOptRule[] MATERIALIZED_VIEW_REWRITING_RULES = + new RelOptRule[] { + HiveMaterializedViewRule.INSTANCE_PROJECT_FILTER, + HiveMaterializedViewRule.INSTANCE_FILTER, + HiveMaterializedViewRule.INSTANCE_PROJECT_JOIN, + HiveMaterializedViewRule.INSTANCE_JOIN, + HiveMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE, + HiveMaterializedViewRule.INSTANCE_AGGREGATE }; + protected static class HiveMaterializedViewProjectAggregateRule extends MaterializedViewProjectAggregateRule { public HiveMaterializedViewProjectAggregateRule( @@ -137,18 +146,40 @@ public SqlAggFunction getRollup(SqlAggFunction aggregation) { } } + /** + * This rule is used within the PROGRAM that rewrites the query for + * partial rewritings. Its goal is to extract the RelNode from the + * HepRelVertex node so the rest of the rules in the PROGRAM can be + * applied correctly. + */ + private static class HiveHepExtractRelNodeRule extends RelOptRule { + + private static final HiveHepExtractRelNodeRule INSTANCE = + new HiveHepExtractRelNodeRule(); + + private HiveHepExtractRelNodeRule() { + super(operand(HepRelVertex.class, any())); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final HepRelVertex rel = call.rel(0); + call.transformTo(rel.getCurrentRel()); + } + } + /** * This rule is used within the PROGRAM that rewrites the query for * partial rewritings. Its goal is to extract the RelNode from the * RelSubset node so the rest of the rules in the PROGRAM can be * applied correctly. */ - private static class HiveExtractRelNodeRule extends RelOptRule { + private static class HiveVolcanoExtractRelNodeRule extends RelOptRule { - private static final HiveExtractRelNodeRule INSTANCE = - new HiveExtractRelNodeRule(); + private static final HiveVolcanoExtractRelNodeRule INSTANCE = + new HiveVolcanoExtractRelNodeRule(); - private HiveExtractRelNodeRule() { + private HiveVolcanoExtractRelNodeRule() { super(operand(RelSubset.class, any())); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 361f150193..bd549db490 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -44,9 +44,11 @@ import org.apache.calcite.config.NullCollation; import org.apache.calcite.interpreter.BindableConvention; import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptMaterialization; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelTraitSet; @@ -2160,11 +2162,12 @@ private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode ba final PerfLogger perfLogger = SessionState.getPerfLogger(); final RelNode calcitePreMVRewritingPlan = basePlan; + final boolean mvRebuild = mvRebuildMode != MaterializationRebuildMode.NONE; // Add views to planner List materializations = new ArrayList<>(); try { - if (mvRebuildMode != MaterializationRebuildMode.NONE) { + if (mvRebuild) { // We only retrieve the materialization corresponding to the rebuild. In turn, // we pass 'true' for the forceMVContentsUpToDate parameter, as we cannot allow the // materialization contents to be stale for a rebuild if we want to use it. @@ -2224,40 +2227,51 @@ private RelNode copyNodeScan(RelNode scan) { if (!materializations.isEmpty()) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - // Use Calcite cost model for view rewriting - optCluster.invalidateMetadataQuery(); - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); - - // Add materializations to planner - for (RelOptMaterialization materialization : materializations) { - planner.addMaterialization(materialization); + if (mvRebuild) { + // If it is a materialized view rebuild, we use the HepPlanner, since we only have + // one MV and we would like to use it to create incremental maintenance plans + HepPlanner hepPlanner = createHepPlanner(basePlan.getCluster(), true, mdProvider, null, + HepMatchOrder.TOP_DOWN, HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES); + // Add materialization for rebuild to planner + assert materializations.size() == 1; + hepPlanner.addMaterialization(materializations.get(0)); + // Optimize plan + hepPlanner.setRoot(basePlan); + basePlan = hepPlanner.findBestExp(); + } else { + // If this is not a rebuild, we use Volcano planner as the decision + // on whether to use MVs or not and which MVs to use should be cost-based + optCluster.invalidateMetadataQuery(); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); + + // Add materializations to planner + for (RelOptMaterialization materialization : materializations) { + planner.addMaterialization(materialization); + } + // Add view-based rewriting rules to planner + for (RelOptRule rule : HiveMaterializedViewRule.MATERIALIZED_VIEW_REWRITING_RULES) { + planner.addRule(rule); + } + // Partition pruner rule + planner.addRule(HiveFilterProjectTSTransposeRule.INSTANCE); + planner.addRule(new HivePartitionPruneRule(conf)); + + // Optimize plan + planner.setRoot(basePlan); + basePlan = planner.findBestExp(); + // Remove view-based rewriting rules from planner + planner.clear(); + + // Restore default cost model + optCluster.invalidateMetadataQuery(); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider)); } - // Add view-based rewriting rules to planner - planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_FILTER); - planner.addRule(HiveMaterializedViewRule.INSTANCE_FILTER); - planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_JOIN); - planner.addRule(HiveMaterializedViewRule.INSTANCE_JOIN); - planner.addRule(HiveMaterializedViewRule.INSTANCE_PROJECT_AGGREGATE); - planner.addRule(HiveMaterializedViewRule.INSTANCE_AGGREGATE); - // Partition pruner rule - planner.addRule(HiveFilterProjectTSTransposeRule.INSTANCE); - planner.addRule(new HivePartitionPruneRule(conf)); - - // Optimize plan - planner.setRoot(basePlan); - basePlan = planner.findBestExp(); - // Remove view-based rewriting rules from planner - planner.clear(); - - // Restore default cost model - optCluster.invalidateMetadataQuery(); - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: View-based rewriting"); if (calcitePreMVRewritingPlan != basePlan) { // A rewriting was produced, we will check whether it was part of an incremental rebuild - // to try to replace INSERT OVERWRITE by INSERT + // to try to replace INSERT OVERWRITE by INSERT or MERGE if (mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL)) { // First we need to check if it is valid to convert to MERGE/INSERT INTO. @@ -2282,6 +2296,24 @@ private RelNode copyNodeScan(RelNode scan) { basePlan = applyPreJoinOrderingTransforms(basePlan, mdProvider, executorProvider); } } + + if (mvRebuildMode == MaterializationRebuildMode.AGGREGATE_REBUILD) { + // Make a cost-based decision factoring the configuration property + optCluster.invalidateMetadataQuery(); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); + RelMetadataQuery mq = RelMetadataQuery.instance(); + RelOptCost costOriginalPlan = mq.getCumulativeCost(calcitePreMVRewritingPlan); + final double factorSelectivity = (double) HiveConf.getFloatVar( + conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL_FACTOR); + RelOptCost costRebuildPlan = mq.getCumulativeCost(basePlan).multiplyBy(factorSelectivity); + if (costOriginalPlan.isLe(costRebuildPlan)) { + basePlan = calcitePreMVRewritingPlan; + mvRebuildMode = MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD; + } + optCluster.invalidateMetadataQuery(); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider)); + } + return basePlan; } @@ -2328,10 +2360,17 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, * @return optimized RelNode */ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, + RelMetadataProvider mdProvider, RexExecutor executorProvider, + HepMatchOrder order, RelOptRule... rules) { + HepPlanner planner = createHepPlanner(basePlan.getCluster(), followPlanChanges, + mdProvider, executorProvider, order, rules); + planner.setRoot(basePlan); + return planner.findBestExp(); + } + + private HepPlanner createHepPlanner(RelOptCluster cluster, boolean followPlanChanges, RelMetadataProvider mdProvider, RexExecutor executorProvider, HepMatchOrder order, RelOptRule... rules) { - - RelNode optimizedRelNode = basePlan; HepProgramBuilder programBuilder = new HepProgramBuilder(); if (followPlanChanges) { programBuilder.addMatchOrder(order); @@ -2344,26 +2383,23 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, // Create planner and copy context HepPlanner planner = new HepPlanner(programBuilder.build(), - basePlan.getCluster().getPlanner().getContext()); + cluster.getPlanner().getContext()); List list = Lists.newArrayList(); list.add(mdProvider); planner.registerMetadataProviders(list); RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); - basePlan.getCluster().setMetadataProvider( + cluster.setMetadataProvider( new CachingRelMetadataProvider(chainedProvider, planner)); if (executorProvider != null) { // basePlan.getCluster.getPlanner is the VolcanoPlanner from apply() // both planners need to use the correct executor - basePlan.getCluster().getPlanner().setExecutor(executorProvider); + cluster.getPlanner().setExecutor(executorProvider); planner.setExecutor(executorProvider); } - planner.setRoot(basePlan); - optimizedRelNode = planner.findBestExp(); - - return optimizedRelNode; + return planner; } @SuppressWarnings("nls") diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out index b12df11a98..6c8668ed0b 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned.q.out @@ -487,18 +487,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src_txn - filterExpr: ((UDFToDouble(key) > 200.0D) and (UDFToDouble(key) < 250.0D)) (type: boolean) + filterExpr: ((ROW__ID.writeid > 1) and (UDFToDouble(key) > 200.0D) and (UDFToDouble(key) < 250.0D)) (type: boolean) Statistics: Num rows: 501 Data size: 175536 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 250.0D) and (UDFToDouble(key) > 200.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 19270 Basic stats: COMPLETE Column stats: NONE + predicate: ((ROW__ID.writeid > 1) and (UDFToDouble(key) < 250.0D) and (UDFToDouble(key) > 200.0D)) (type: boolean) + Statistics: Num rows: 18 Data size: 6306 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), key (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 19270 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 6306 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 55 Data size: 19270 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 6306 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -507,18 +507,18 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: value, key - Statistics: Num rows: 55 Data size: 19270 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 6306 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(value, 'hll') keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 19270 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 6306 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 19270 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 6306 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: may be used (ACID table) @@ -530,14 +530,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 9459 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 3153 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: struct), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 9459 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 3153 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 9459 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 3153 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -551,7 +551,7 @@ STAGE PLANS: tables: partition: key - replace: true + replace: false table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -576,70 +576,8 @@ PREHOOK: Output: default@partition_mv_1 POSTHOOK: query: ALTER MATERIALIZED VIEW partition_mv_1 REBUILD POSTHOOK: type: QUERY POSTHOOK: Input: default@src_txn -POSTHOOK: Output: default@partition_mv_1@key=201 -POSTHOOK: Output: default@partition_mv_1@key=202 -POSTHOOK: Output: default@partition_mv_1@key=203 -POSTHOOK: Output: default@partition_mv_1@key=205 -POSTHOOK: Output: default@partition_mv_1@key=207 -POSTHOOK: Output: default@partition_mv_1@key=208 -POSTHOOK: Output: default@partition_mv_1@key=209 -POSTHOOK: Output: default@partition_mv_1@key=213 -POSTHOOK: Output: default@partition_mv_1@key=214 -POSTHOOK: Output: default@partition_mv_1@key=216 -POSTHOOK: Output: default@partition_mv_1@key=217 -POSTHOOK: Output: default@partition_mv_1@key=218 -POSTHOOK: Output: default@partition_mv_1@key=219 -POSTHOOK: Output: default@partition_mv_1@key=221 -POSTHOOK: Output: default@partition_mv_1@key=222 -POSTHOOK: Output: default@partition_mv_1@key=223 -POSTHOOK: Output: default@partition_mv_1@key=224 -POSTHOOK: Output: default@partition_mv_1@key=226 -POSTHOOK: Output: default@partition_mv_1@key=228 -POSTHOOK: Output: default@partition_mv_1@key=229 -POSTHOOK: Output: default@partition_mv_1@key=230 -POSTHOOK: Output: default@partition_mv_1@key=233 -POSTHOOK: Output: default@partition_mv_1@key=235 -POSTHOOK: Output: default@partition_mv_1@key=237 POSTHOOK: Output: default@partition_mv_1@key=238 -POSTHOOK: Output: default@partition_mv_1@key=239 -POSTHOOK: Output: default@partition_mv_1@key=241 -POSTHOOK: Output: default@partition_mv_1@key=242 -POSTHOOK: Output: default@partition_mv_1@key=244 -POSTHOOK: Output: default@partition_mv_1@key=247 -POSTHOOK: Output: default@partition_mv_1@key=248 -POSTHOOK: Output: default@partition_mv_1@key=249 -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=201).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=202).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=203).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=205).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=207).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=208).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=209).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=213).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=214).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=216).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=217).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=218).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=219).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=221).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=222).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=223).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=224).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=226).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=228).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=229).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=230).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=233).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=235).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=237).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: partition_mv_1 PARTITION(key=238).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=239).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=241).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=242).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=244).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=247).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=248).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_1 PARTITION(key=249).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: SELECT * FROM partition_mv_1 where key = 238 PREHOOK: type: QUERY PREHOOK: Input: default@partition_mv_1 @@ -860,20 +798,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src_txn - filterExpr: ((UDFToDouble(key) > 200.0D) and (UDFToDouble(key) < 250.0D)) (type: boolean) + filterExpr: ((UDFToDouble(key) > 200.0D) and (UDFToDouble(key) < 250.0D) and (ROW__ID.writeid > 2)) (type: boolean) Statistics: Num rows: 502 Data size: 175904 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 250.0D) and (UDFToDouble(key) > 200.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 19272 Basic stats: COMPLETE Column stats: NONE + predicate: ((ROW__ID.writeid > 2) and (UDFToDouble(key) < 250.0D) and (UDFToDouble(key) > 200.0D)) (type: boolean) + Statistics: Num rows: 18 Data size: 6307 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 19272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 6307 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 19272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 6307 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -907,14 +845,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 21199 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 10597 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 21199 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 10597 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 21199 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 10597 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -923,18 +861,18 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: value, key - Statistics: Num rows: 60 Data size: 21199 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 10597 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(value, 'hll') keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 21199 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 10597 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 60 Data size: 21199 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 10597 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct) Reducer 3 Execution mode: llap @@ -944,14 +882,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 10599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 5298 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: struct), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 10599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 5298 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 10599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 5298 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -965,7 +903,7 @@ STAGE PLANS: tables: partition: key - replace: true + replace: false table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -992,70 +930,8 @@ POSTHOOK: query: ALTER MATERIALIZED VIEW partition_mv_3 REBUILD POSTHOOK: type: QUERY POSTHOOK: Input: default@src_txn POSTHOOK: Input: default@src_txn_2 -POSTHOOK: Output: default@partition_mv_3@key=201 -POSTHOOK: Output: default@partition_mv_3@key=202 -POSTHOOK: Output: default@partition_mv_3@key=203 -POSTHOOK: Output: default@partition_mv_3@key=205 -POSTHOOK: Output: default@partition_mv_3@key=207 -POSTHOOK: Output: default@partition_mv_3@key=208 -POSTHOOK: Output: default@partition_mv_3@key=209 -POSTHOOK: Output: default@partition_mv_3@key=213 -POSTHOOK: Output: default@partition_mv_3@key=214 -POSTHOOK: Output: default@partition_mv_3@key=216 -POSTHOOK: Output: default@partition_mv_3@key=217 -POSTHOOK: Output: default@partition_mv_3@key=218 -POSTHOOK: Output: default@partition_mv_3@key=219 -POSTHOOK: Output: default@partition_mv_3@key=221 -POSTHOOK: Output: default@partition_mv_3@key=222 -POSTHOOK: Output: default@partition_mv_3@key=223 -POSTHOOK: Output: default@partition_mv_3@key=224 -POSTHOOK: Output: default@partition_mv_3@key=226 -POSTHOOK: Output: default@partition_mv_3@key=228 -POSTHOOK: Output: default@partition_mv_3@key=229 -POSTHOOK: Output: default@partition_mv_3@key=230 -POSTHOOK: Output: default@partition_mv_3@key=233 -POSTHOOK: Output: default@partition_mv_3@key=235 -POSTHOOK: Output: default@partition_mv_3@key=237 POSTHOOK: Output: default@partition_mv_3@key=238 -POSTHOOK: Output: default@partition_mv_3@key=239 -POSTHOOK: Output: default@partition_mv_3@key=241 -POSTHOOK: Output: default@partition_mv_3@key=242 -POSTHOOK: Output: default@partition_mv_3@key=244 -POSTHOOK: Output: default@partition_mv_3@key=247 -POSTHOOK: Output: default@partition_mv_3@key=248 -POSTHOOK: Output: default@partition_mv_3@key=249 -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=201).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=202).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=203).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=205).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=207).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=208).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=209).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=213).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=214).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=216).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=217).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=218).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=219).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=221).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=222).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=223).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=224).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=226).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=228).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=229).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=230).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=233).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=235).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=237).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: partition_mv_3 PARTITION(key=238).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=239).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=241).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=242).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=244).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=247).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=248).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: partition_mv_3 PARTITION(key=249).value SIMPLE [(src_txn)src_txn.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: SELECT * FROM partition_mv_3 where key = 238 PREHOOK: type: QUERY PREHOOK: Input: default@partition_mv_3 @@ -1070,7 +946,7 @@ val_238 238 val_238 238 val_238 238 val_238 238 -val_238_n2 238 -val_238_n2 238 val_238_n 238 val_238_n 238 +val_238_n2 238 +val_238_n2 238 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out index 4d37d82b6e..a4de1a055f 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out @@ -77,14 +77,16 @@ POSTHOOK: Input: default@mv_rebuild PREHOOK: query: alter materialized view mv_rebuild rebuild PREHOOK: type: QUERY PREHOOK: Input: default@basetable_rebuild +PREHOOK: Input: default@mv_rebuild PREHOOK: Output: default@mv_rebuild POSTHOOK: query: alter materialized view mv_rebuild rebuild POSTHOOK: type: QUERY POSTHOOK: Input: default@basetable_rebuild +POSTHOOK: Input: default@mv_rebuild POSTHOOK: Output: default@mv_rebuild -POSTHOOK: Lineage: mv_rebuild._c2 EXPRESSION [(basetable_rebuild)basetable_rebuild.FieldSchema(name:a, type:int, comment:null), ] -POSTHOOK: Lineage: mv_rebuild.a SIMPLE [(basetable_rebuild)basetable_rebuild.FieldSchema(name:a, type:int, comment:null), ] -POSTHOOK: Lineage: mv_rebuild.b SIMPLE [(basetable_rebuild)basetable_rebuild.FieldSchema(name:b, type:varchar(256), comment:null), ] +POSTHOOK: Lineage: mv_rebuild._c2 EXPRESSION [(basetable_rebuild)basetable_rebuild.FieldSchema(name:a, type:int, comment:null), (mv_rebuild)default.mv_rebuild.FieldSchema(name:_c2, type:bigint, comment:null), ] +POSTHOOK: Lineage: mv_rebuild.a EXPRESSION [(basetable_rebuild)basetable_rebuild.FieldSchema(name:a, type:int, comment:null), (mv_rebuild)default.mv_rebuild.FieldSchema(name:a, type:int, comment:null), ] +POSTHOOK: Lineage: mv_rebuild.b EXPRESSION [(basetable_rebuild)basetable_rebuild.FieldSchema(name:b, type:varchar(256), comment:null), (mv_rebuild)default.mv_rebuild.FieldSchema(name:b, type:varchar(256), comment:null), ] PREHOOK: query: select * from mv_rebuild PREHOOK: type: QUERY PREHOOK: Input: default@mv_rebuild