diff --git ql/src/java/org/apache/hadoop/hive/ql/Context.java ql/src/java/org/apache/hadoop/hive/ql/Context.java index 0fedf0e..1303e7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -74,6 +74,7 @@ * each query should call clear() at end of use to remove temporary folders */ public class Context { + private boolean isHDFSCleanup; private Path resFile; private Path resDir; @@ -103,6 +104,7 @@ protected ExplainConfiguration explainConfig = null; protected String cboInfo; protected boolean cboSucceeded; + protected String optimizedSql; protected String cmd = ""; private TokenRewriteStream tokenRewriteStream; // Holds the qualified name to tokenRewriteStream for the views @@ -957,6 +959,14 @@ public void setCboInfo(String cboInfo) { this.cboInfo = cboInfo; } + public String getOptimizedSql() { + return this.optimizedSql; + } + + public void setOptimizedSql(String newSql) { + this.optimizedSql = newSql; + } + public boolean isCboSucceeded() { return cboSucceeded; } diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 41ad002..917c6c4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -668,7 +668,8 @@ public void run() { schema = getSchema(sem, conf); plan = new QueryPlan(queryStr, sem, queryDisplay.getQueryStartTime(), queryId, queryState.getHiveOperation(), schema); - + // save the optimized sql for the explain + plan.setOptimizedQueryString(ctx.getOptimizedSql()); conf.set("mapreduce.workflow.id", "hive_" + queryId); conf.set("mapreduce.workflow.name", queryStr); @@ -901,7 +902,7 @@ private String getExplainOutput(BaseSemanticAnalyzer sem, QueryPlan plan, PrintStream ps = new PrintStream(baos); try { List> rootTasks = sem.getAllRootTasks(); - task.getJSONPlan(ps, rootTasks, sem.getFetchTask(), false, true, true); + task.getJSONPlan(ps, rootTasks, sem.getFetchTask(), false, true, true, plan.getOptimizedQueryString()); ret = baos.toString(); } catch (Exception e) { LOG.warn("Exception generating explain output: " + e, e); diff --git ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java index 79e938a..d0872a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java +++ ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java @@ -74,6 +74,7 @@ private String queryString; + private String optimizedQueryString; private ArrayList> rootTasks; private FetchTask fetchTask; @@ -740,6 +741,14 @@ public String getQueryString() { public void setQueryString(String queryString) { this.queryString = queryString; } + + public String getOptimizedQueryString() { + return this.optimizedQueryString; + } + + public void setOptimizedQueryString(String optimizedQueryString) { + this.optimizedQueryString = optimizedQueryString; + } public org.apache.hadoop.hive.ql.plan.api.Query getQuery() { return query; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 34da025..752c3f3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -217,11 +217,11 @@ public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws E public JSONObject getJSONPlan(PrintStream out, ExplainWork work) throws Exception { return getJSONPlan(out, work.getRootTasks(), work.getFetchTask(), - work.isFormatted(), work.getExtended(), work.isAppendTaskType()); + work.isFormatted(), work.getExtended(), work.isAppendTaskType(), work.getOptimizedSQL()); } public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetchTask, - boolean jsonOutput, boolean isExtended, boolean appendTaskType) throws Exception { + boolean jsonOutput, boolean isExtended, boolean appendTaskType, String optimizedSQL) throws Exception { // If the user asked for a formatted output, dump the json output // in the output stream @@ -231,6 +231,15 @@ public JSONObject getJSONPlan(PrintStream out, List> tasks, Task fetc out = null; } + if (optimizedSQL != null) { + if (jsonOutput) { + outJSONObject.put("optimizedSQL", optimizedSQL); + } else { + out.print("OPTIMIZED SQL: "); + out.println(optimizedSQL); + } + } + List ordered = StageIDsRearranger.getExplainOrder(conf, tasks); if (fetchTask != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java index 0ae60b5..fa69f13 100644 --- ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java @@ -266,7 +266,8 @@ public void run() { plan.getFetchTask(),// FetchTask null,// analyzer config, //explainConfig - null// cboInfo + null, // cboInfo + plan.getOptimizedQueryString() // optimizedSQL ); @SuppressWarnings("unchecked") ExplainTask explain = (ExplainTask) TaskFactory.get(work); diff --git ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java index 1ae8194..d1deac4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java @@ -438,7 +438,8 @@ private JSONObject getExplainPlan(QueryPlan plan, HiveConf conf, HookContext hoo plan.getFetchTask(), // FetchTask null, // analyzer config, // explainConfig - null // cboInfo + null, // cboInfo, + plan.getOptimizedQueryString() ); ExplainTask explain = (ExplainTask) TaskFactory.get(work, conf); explain.initialize(hookContext.getQueryState(), plan, null, null); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 0bc9d23..fb44381 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -44,7 +44,9 @@ import java.util.concurrent.atomic.AtomicInteger; import javax.sql.DataSource; + import com.google.common.collect.Iterables; + import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.Tree; @@ -53,7 +55,9 @@ import org.apache.calcite.adapter.druid.DruidQuery; import org.apache.calcite.adapter.druid.DruidSchema; import org.apache.calcite.adapter.druid.DruidTable; +import org.apache.calcite.adapter.java.JavaTypeFactory; import org.apache.calcite.adapter.jdbc.JdbcConvention; +import org.apache.calcite.adapter.jdbc.JdbcImplementor; import org.apache.calcite.adapter.jdbc.JdbcSchema; import org.apache.calcite.adapter.jdbc.JdbcTable; import org.apache.calcite.config.CalciteConnectionConfig; @@ -117,7 +121,9 @@ import org.apache.calcite.sql.SqlLiteral; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlSampleSpec; import org.apache.calcite.sql.SqlWindow; +import org.apache.calcite.sql.dialect.HiveSqlDialect; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.SqlTypeName; @@ -234,7 +240,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; - import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCAggregationPushDownRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCFilterJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCFilterPushDownRule; @@ -244,12 +249,10 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCSortPushDownRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCUnionPushDownRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc.JDBCAbstractSplitFilterRule; - import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAggregateIncrementalRewritingRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveNoAggregateIncrementalRewritingRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.MaterializedViewRewritingRelVisitor; - import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -450,8 +453,10 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept this.ctx.setCboInfo("Plan optimized by CBO."); this.ctx.setCboSucceeded(true); } else { - // 1. Gen Optimized AST - ASTNode newAST = getOptimizedAST(); + // 0. Gen Optimized Plan + final RelNode newPlan = logicalPlan(); + // 1. Convert Plan to AST + ASTNode newAST = getOptimizedAST(newPlan); // 1.1. Fix up the query for insert/ctas/materialized views newAST = fixUpAfterCbo(ast, newAST, cboCtx); @@ -522,7 +527,14 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept LOG.info("CBO Succeeded; optimized logical plan."); this.ctx.setCboInfo("Plan optimized by CBO."); this.ctx.setCboSucceeded(true); + if (this.ctx.isExplainPlan()) { + ExplainConfiguration explainConfig = this.ctx.getExplainConfig(); + if (explainConfig.isExtended() || explainConfig.isFormatted()) { + this.ctx.setOptimizedSql(getOptimizedSql(newPlan)); + } + } if (LOG.isTraceEnabled()) { + LOG.trace(getOptimizedSql(newPlan)); LOG.trace(newAST.dump()); } } @@ -1423,6 +1435,32 @@ RelNode logicalPlan() throws SemanticException { } return optimizedOptiqPlan; } + + /** + * Get SQL rewrite for a Calcite logical plan + * + * @return Optimized SQL text (or null, if failed) + * + */ + public String getOptimizedSql(RelNode optimizedOptiqPlan) { + SqlDialect dialect = new HiveSqlDialect(SqlDialect.EMPTY_CONTEXT) { + @Override + public boolean supportsCharSet() { + return false; + } + }; + try { + final JdbcImplementor jdbcImplementor = + new JdbcImplementor(dialect, (JavaTypeFactory) optimizedOptiqPlan.getCluster() + .getTypeFactory()); + final JdbcImplementor.Result result = jdbcImplementor.visitChild(0, optimizedOptiqPlan); + String sql = result.asStatement().toSqlString(dialect).getSql(); + return sql.replaceAll("VARCHAR(2147483647)", "STRING"); + } catch (Exception ex) { + LOG.warn("Rel2SQL Rewrite threw error", ex); + } + return null; + } /** * Get Optimized AST for the given QB tree in the semAnalyzer. @@ -1431,7 +1469,16 @@ RelNode logicalPlan() throws SemanticException { * @throws SemanticException */ ASTNode getOptimizedAST() throws SemanticException { - RelNode optimizedOptiqPlan = logicalPlan(); + return getOptimizedAST(logicalPlan()); + } + + /** + * Get Optimized AST for the given QB tree in the semAnalyzer. + * + * @return Optimized operator tree translated in to Hive AST + * @throws SemanticException + */ + ASTNode getOptimizedAST(RelNode optimizedOptiqPlan) throws SemanticException { ASTNode optiqOptimizedAST = ASTConverter.convert(optimizedOptiqPlan, resultSchema, HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_COLUMN_ALIGNMENT)); return optiqOptimizedAST; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 918cc5a..1bed755 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -203,7 +203,8 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { fetchTask, sem, config, - ctx.getCboInfo()); + ctx.getCboInfo(), + ctx.getOptimizedSql()); work.setAppendTaskType( HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES)); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index cde7852..3a42a68 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -50,6 +50,8 @@ String cboInfo; + private String optimizedSQL; + private transient BaseSemanticAnalyzer analyzer; public ExplainWork() { @@ -61,7 +63,8 @@ public ExplainWork(Path resFile, Task fetchTask, BaseSemanticAnalyzer analyzer, ExplainConfiguration config, - String cboInfo) { + String cboInfo, + String optimizedSQL) { this.resFile = resFile; this.rootTasks = new ArrayList>(rootTasks); this.fetchTask = fetchTask; @@ -71,6 +74,7 @@ public ExplainWork(Path resFile, } this.pCtx = pCtx; this.cboInfo = cboInfo; + this.optimizedSQL = optimizedSQL; this.config = config; } @@ -170,6 +174,14 @@ public void setCboInfo(String cboInfo) { this.cboInfo = cboInfo; } + public String getOptimizedSQL() { + return optimizedSQL; + } + + public void setOptimizedSQL(String optimizedSQL) { + this.optimizedSQL = optimizedSQL; + } + public ExplainConfiguration getConfig() { return config; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java index 5a77722..244bc73 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java @@ -218,7 +218,7 @@ public void testGetJSONPlan() throws Exception { JsonNode result = objectMapper.readTree(uut.getJSONPlan(null, tasks, null, true, - false, false).toString()); + false, false, null).toString()); JsonNode expected = objectMapper.readTree("{\"STAGE DEPENDENCIES\":{\"mockTaskId\":" + "{\"ROOT STAGE\":\"TRUE\",\"BACKUP STAGE\":\"backup-id-mock\"}},\"STAGE PLANS\":" + "{\"mockTaskId\":{}}}"); diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java index ffd0445..470263b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java @@ -299,7 +299,7 @@ private String explain(SemanticAnalyzer sem, QueryPlan plan) throws ExplainConfiguration config = new ExplainConfiguration(); config.setExtended(true); ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(), - sem.getFetchTask(), sem, config, null); + sem.getFetchTask(), sem, config, null, plan.getOptimizedQueryString()); ExplainTask task = new ExplainTask(); task.setWork(work); task.initialize(queryState, plan, null, null);