diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 01ecf0adf8..43f38566c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -692,7 +692,8 @@ public void run() { schema = getSchema(sem, conf); plan = new QueryPlan(queryStr, sem, queryDisplay.getQueryStartTime(), queryId, queryState.getHiveOperation(), schema); - // save the optimized sql for the explain + // save the optimized plan and sql for the explain + plan.setOptimizedCBOPlan(ctx.getCalcitePlan()); plan.setOptimizedQueryString(ctx.getOptimizedSql()); conf.set("mapreduce.workflow.id", "hive_" + queryId); @@ -1046,7 +1047,7 @@ private String getExplainOutput(BaseSemanticAnalyzer sem, QueryPlan plan, if (conf.getBoolVar(ConfVars.HIVE_SERVER2_WEBUI_SHOW_GRAPH)) { JSONObject jsonPlan = task.getJSONPlan( null, rootTasks, sem.getFetchTask(), true, true, true, sem.getCboInfo(), - plan.getOptimizedQueryString()); + plan.getOptimizedCBOPlan(), plan.getOptimizedQueryString()); if (jsonPlan.getJSONObject(ExplainTask.STAGE_DEPENDENCIES) != null && jsonPlan.getJSONObject(ExplainTask.STAGE_DEPENDENCIES).length() <= conf.getIntVar(ConfVars.HIVE_SERVER2_WEBUI_MAX_GRAPH_SIZE)) { @@ -1056,7 +1057,7 @@ private String getExplainOutput(BaseSemanticAnalyzer sem, QueryPlan plan, } } else { task.getJSONPlan(ps, rootTasks, sem.getFetchTask(), false, true, true, sem.getCboInfo(), - plan.getOptimizedQueryString()); + plan.getOptimizedCBOPlan(), plan.getOptimizedQueryString()); ret = baos.toString(); } } catch (Exception e) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java index ac03efe308..7636019770 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java @@ -75,6 +75,7 @@ private String cboInfo; private String queryString; + private String optimizedCBOPlan; private String optimizedQueryString; private ArrayList> rootTasks; @@ -761,6 +762,14 @@ public void setOptimizedQueryString(String optimizedQueryString) { this.optimizedQueryString = optimizedQueryString; } + public String getOptimizedCBOPlan() { + return this.optimizedCBOPlan; + } + + public void setOptimizedCBOPlan(String optimizedCBOPlan) { + this.optimizedCBOPlan = optimizedCBOPlan; + } + public org.apache.hadoop.hive.ql.plan.api.Query getQuery() { return query; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 7c4efab2b5..2d21b16f6c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -232,11 +232,12 @@ public JSONObject getJSONPlan(PrintStream out, ExplainWork work) throws Exception { return getJSONPlan(out, work.getRootTasks(), work.getFetchTask(), work.isFormatted(), work.getExtended(), work.isAppendTaskType(), work.getCboInfo(), - work.getOptimizedSQL()); + work.getCboPlan(), work.getOptimizedSQL()); } public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetchTask, - boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo, String optimizedSQL) throws Exception { + boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo, + String optimizedCBOPlan, String optimizedSQL) throws Exception { // If the user asked for a formatted output, dump the json output // in the output stream @@ -246,6 +247,15 @@ public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetc out = null; } + if (optimizedCBOPlan != null) { + if (jsonOutput) { + outJSONObject.put("optimizedCBOPlan", optimizedCBOPlan); + } else { + out.print("OPTIMIZED CBO PLAN: "); + out.println(optimizedCBOPlan); + } + } + if (optimizedSQL != null) { if (jsonOutput) { outJSONObject.put("optimizedSQL", optimizedSQL); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java index 8b10823b37..9cb4d8c4b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java @@ -269,7 +269,7 @@ public void run() { config, //explainConfig null, // cboInfo plan.getOptimizedQueryString(), // optimizedSQL - null + plan.getOptimizedCBOPlan() ); @SuppressWarnings("unchecked") ExplainTask explain = (ExplainTask) TaskFactory.get(work); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java index 0a096757f7..3d51c075bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java @@ -512,7 +512,7 @@ private JSONObject getExplainPlan(QueryPlan plan, HiveConf conf, HookContext hoo config, // explainConfig plan.getCboInfo(), // cboInfo, plan.getOptimizedQueryString(), - null + plan.getOptimizedCBOPlan() ); ExplainTask explain = (ExplainTask) TaskFactory.get(work, conf); explain.initialize(hookContext.getQueryState(), plan, null, null); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index e99e6d3fe1..d4e5712b31 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -20,6 +20,8 @@ import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; +import java.io.PrintWriter; +import java.io.StringWriter; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; @@ -37,6 +39,7 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelReferentialConstraint; +import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Aggregate.Group; import org.apache.calcite.rel.core.AggregateCall; @@ -47,6 +50,7 @@ import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.externalize.RelWriterImpl; import org.apache.calcite.rel.metadata.RelColumnOrigin; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; @@ -60,6 +64,7 @@ import org.apache.calcite.rex.RexTableInputRef; import org.apache.calcite.rex.RexTableInputRef.RelTableRef; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlExplainLevel; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; @@ -1034,4 +1039,20 @@ protected static EquivalenceClasses copy(EquivalenceClasses ec) { } return null; } + + /** + * Converts a relational expression to a string, showing information that will aid + * to parse the string back. + */ + public static String toParseableString(final RelNode rel) { + if (rel == null) { + return null; + } + + final StringWriter sw = new StringWriter(); + final RelWriter planWriter = new HiveRelWriterImpl(new PrintWriter(sw)); + rel.explain(planWriter); + return sw.toString(); + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java new file mode 100644 index 0000000000..c6a65e823f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import com.google.common.collect.ImmutableList; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; +import org.apache.calcite.avatica.util.Spacer; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelWriter; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.util.Pair; + +/** + * Writer implementation for rel nodes that produces an output that is easily + * parseable back into rel nodes. + */ +public class HiveRelWriterImpl implements RelWriter { + + private static final SqlExplainLevel DETAIL_LEVEL = SqlExplainLevel.ALL_ATTRIBUTES; + + //~ Instance fields -------------------------------------------------------- + + protected final PrintWriter pw; + protected final Spacer spacer = new Spacer(); + private final List> values = new ArrayList<>(); + + //~ Constructors ----------------------------------------------------------- + + public HiveRelWriterImpl(PrintWriter pw) { + this.pw = pw; + } + + //~ Methods ---------------------------------------------------------------- + + protected void explain_(RelNode rel, List> values) { + List inputs = rel.getInputs(); + final RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); + if (!mq.isVisibleInExplain(rel, DETAIL_LEVEL)) { + // render children in place of this, at same level + explainInputs(inputs); + return; + } + + StringBuilder s = new StringBuilder(); + spacer.spaces(s); + s.append(rel.getRelTypeName()); + int j = 0; + for (Pair value : values) { + if (value.right instanceof RelNode) { + continue; + } + if (j++ == 0) { + s.append("("); + } else { + s.append(", "); + } + s.append(value.left) + .append("=[") + .append(value.right) + .append("]"); + } + if (j > 0) { + s.append(")"); + } + s.append(": [row count = ") + .append(mq.getRowCount(rel)); + if (rel.getInputs().size() == 0) { + // This is a leaf, we will print the average row size and schema + s.append(", avg row size = ") + .append(mq.getAverageRowSize(rel)) + .append(", row type = ") + .append(rel.getRowType()); + } + s.append(", cost = ") + .append(mq.getNonCumulativeCost(rel)) + .append("]"); + pw.println(s); + spacer.add(2); + explainInputs(inputs); + spacer.subtract(2); + } + + private void explainInputs(List inputs) { + for (RelNode input : inputs) { + input.explain(this); + } + } + + public final void explain(RelNode rel, List> valueList) { + explain_(rel, valueList); + } + + public SqlExplainLevel getDetailLevel() { + return DETAIL_LEVEL; + } + + public RelWriter input(String term, RelNode input) { + values.add(Pair.of(term, (Object) input)); + return this; + } + + public RelWriter item(String term, Object value) { + values.add(Pair.of(term, value)); + return this; + } + + public RelWriter itemIf(String term, Object value, boolean condition) { + if (condition) { + item(term, value); + } + return this; + } + + public RelWriter done(RelNode node) { + assert checkInputsPresentInExplain(node); + final List> valuesCopy = + ImmutableList.copyOf(values); + values.clear(); + explain_(node, valuesCopy); + pw.flush(); + return this; + } + + private boolean checkInputsPresentInExplain(RelNode node) { + int i = 0; + if (values.size() > 0 && values.get(0).left.equals("subset")) { + ++i; + } + for (RelNode input : node.getInputs()) { + assert values.get(i).right == input; + ++i; + } + return true; + } + + public boolean nest() { + return false; + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 646ce09524..bed15be615 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -150,6 +150,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptMaterializationValidator; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -541,6 +542,9 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan)); } } else if (explainConfig.isExtended() || explainConfig.isFormatted()) { + newPlan.getCluster().invalidateMetadataQuery(); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); + this.ctx.setCalcitePlan(HiveRelOptUtil.toParseableString(newPlan)); this.ctx.setOptimizedSql(getOptimizedSql(newPlan)); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java index a0c0413431..8ba612ebce 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java @@ -218,7 +218,7 @@ public void testGetJSONPlan() throws Exception { JsonNode result = objectMapper.readTree(uut.getJSONPlan(null, tasks, null, true, - false, false, "Plan Optimized by CBO", null).toString()); + false, false, "Plan Optimized by CBO", null, null).toString()); JsonNode expected = objectMapper.readTree("{\"cboInfo\":\"Plan Optimized by CBO\", \"STAGE DEPENDENCIES\":{\"mockTaskId\":" + "{\"ROOT STAGE\":\"TRUE\",\"BACKUP STAGE\":\"backup-id-mock\"}},\"STAGE PLANS\":" + "{\"mockTaskId\":{}}}"); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java index f449c6b408..9b6827ef0d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java @@ -300,7 +300,7 @@ private String explain(SemanticAnalyzer sem, QueryPlan plan) throws ExplainConfiguration config = new ExplainConfiguration(); config.setExtended(true); ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(), - sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString(), null); + sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString(), plan.getOptimizedCBOPlan()); ExplainTask task = new ExplainTask(); task.setWork(work); task.initialize(queryState, plan, null, null);