diff --git ql/src/java/org/apache/hadoop/hive/ql/Context.java ql/src/java/org/apache/hadoop/hive/ql/Context.java index 5340e99..f0d8185 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -79,6 +79,7 @@ private final Configuration conf; protected int pathid = 10000; protected boolean explain = false; + protected boolean explainPhysical = false; protected String cmd = ""; // number of previous attempts protected int tryCount = 0; @@ -140,11 +141,26 @@ public void setExplain(boolean value) { * Find whether the current query is an explain query * @return true if the query is an explain query, false if not */ - public boolean getExplain () { + public boolean getExplain() { return explain; } /** + * Find whether the current query is a physical explain query + */ + public boolean getExplainPhysical() { + return explainPhysical; + } + + /** + * Set the context on whether the current query is a physical + * explain query. + */ + public void setExplainPhysical(boolean explainPhysical) { + this.explainPhysical = explainPhysical; + } + + /** * Set the original query command. * @param cmd the original query command string */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 5a00c2d..254e462 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -39,7 +39,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.metadata.Table; @@ -60,6 +59,9 @@ public class ExplainTask extends Task implements Serializable { private static final long serialVersionUID = 1L; public static final String EXPL_COLUMN_NAME = "Explain"; + private Set> visitedOps = new HashSet>(); + private boolean isPhysical = false; + public ExplainTask() { super(); } @@ -109,7 +111,32 @@ private static JSONObject getJSONDependencies(ExplainWork work) return outJSONObject; } - static public JSONObject getJSONPlan(PrintStream out, ExplainWork work) + public JSONObject getJSONPhysicalPlan(PrintStream out, ExplainWork work) throws Exception { + isPhysical = true; + + JSONObject outJSONObject = new JSONObject(); + boolean jsonOutput = work.isFormatted(); + if (jsonOutput) { + out = null; + } + + if (work.getParseContext() != null) { + JSONObject jsonPlan = outputMap(work.getParseContext().getTopOps(), + "PHYSICAL PLAN", out, jsonOutput, work.getExtended(), 0); + if (out != null) { + out.println(); + } + + if (jsonOutput) { + outJSONObject.put("PHYSICAL PLAN", jsonPlan); + } + } else { + System.err.println("No parse context!"); + } + return outJSONObject; + } + + public JSONObject getJSONPlan(PrintStream out, ExplainWork work) throws Exception { // If the user asked for a formatted output, dump the json output // in the output stream @@ -161,13 +188,20 @@ public int execute(DriverContext driverContext) { OutputStream outS = resFile.getFileSystem(conf).create(resFile); out = new PrintStream(outS); - if (work.getDependency()) { - JSONObject jsonDependencies = getJSONDependencies(work); - out.print(jsonDependencies); - } else { - JSONObject jsonPlan = getJSONPlan(out, work); + if (work.isPhysical()) { + JSONObject jsonPhysicalPlan = getJSONPhysicalPlan(out, work); if (work.isFormatted()) { - out.print(jsonPlan); + out.print(jsonPhysicalPlan); + } + } else { + if (work.getDependency()) { + JSONObject jsonDependencies = getJSONDependencies(work); + out.print(jsonDependencies); + } else { + JSONObject jsonPlan = getJSONPlan(out, work); + if (work.isFormatted()) { + out.print(jsonPlan); + } } } @@ -185,7 +219,7 @@ public int execute(DriverContext driverContext) { } } - private static String indentString(int indent) { + private String indentString(int indent) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < indent; ++i) { sb.append(" "); @@ -194,7 +228,7 @@ private static String indentString(int indent) { return sb.toString(); } - private static JSONObject outputMap(Map mp, String header, PrintStream out, + private JSONObject outputMap(Map mp, String header, PrintStream out, boolean extended, boolean jsonOutput, int indent) throws Exception { boolean first_el = true; @@ -252,7 +286,7 @@ else if (ent.getValue() instanceof Serializable) { return jsonOutput ? json : null; } - private static JSONArray outputList(List l, String header, PrintStream out, + private JSONArray outputList(List l, String header, PrintStream out, boolean extended, boolean jsonOutput, int indent) throws Exception { boolean first_el = true; @@ -297,7 +331,7 @@ else if (o instanceof Serializable) { return jsonOutput ? outputArray : null; } - private static boolean isPrintable(Object val) { + private boolean isPrintable(Object val) { if (val instanceof Boolean || val instanceof String || val instanceof Integer || val instanceof Long || val instanceof Byte || val instanceof Float || val instanceof Double) { @@ -311,8 +345,13 @@ private static boolean isPrintable(Object val) { return false; } - private static JSONObject outputPlan(Serializable work, PrintStream out, - boolean extended, boolean jsonOutput, int indent) throws Exception { + private JSONObject outputPlan(Serializable work, + PrintStream out, boolean extended, boolean jsonOutput, int indent) throws Exception { + return outputPlan(work, out, extended, jsonOutput, indent, ""); + } + + private JSONObject outputPlan(Serializable work, PrintStream out, + boolean extended, boolean jsonOutput, int indent, String appendToHeader) throws Exception { // Check if work has an explain annotation Annotation note = work.getClass().getAnnotation(Explain.class); @@ -324,7 +363,7 @@ private static JSONObject outputPlan(Serializable work, PrintStream out, keyJSONObject = xpl_note.displayName(); if (out != null) { out.print(indentString(indent)); - out.println(xpl_note.displayName()); + out.println(xpl_note.displayName()+appendToHeader); } } } @@ -337,17 +376,20 @@ private static JSONObject outputPlan(Serializable work, PrintStream out, (Operator) work; if (operator.getConf() != null) { JSONObject jsonOut = outputPlan(operator.getConf(), out, extended, - jsonOutput, jsonOutput ? 0 : indent); + jsonOutput, jsonOutput ? 0 : indent, isPhysical ? " ("+operator.getOperatorId()+")" : ""); if (jsonOutput) { json.put(operator.getOperatorId(), jsonOut); } } - if (operator.getChildOperators() != null) { - for (Operator op : operator.getChildOperators()) { - JSONObject jsonOut = outputPlan(op, out, extended, jsonOutput, jsonOutput ? 0 : indent + 2); - if (jsonOutput) { - json.put(operator.getOperatorId(), jsonOut); + if (!visitedOps.contains(operator) || !isPhysical) { + visitedOps.add(operator); + if (operator.getChildOperators() != null) { + for (Operator op : operator.getChildOperators()) { + JSONObject jsonOut = outputPlan(op, out, extended, jsonOutput, jsonOutput ? 0 : indent + 2); + if (jsonOutput) { + json.put(operator.getOperatorId(), jsonOut); + } } } } @@ -483,14 +525,14 @@ private static JSONObject outputPlan(Serializable work, PrintStream out, * @param val * @return */ - private static boolean shouldPrint(Explain exp, Object val) { + private boolean shouldPrint(Explain exp, Object val) { if (exp.displayOnlyOnTrue() && (val instanceof Boolean) & !((Boolean)val)) { return false; } return true; } - private static JSONObject outputPlan(Task task, + private JSONObject outputPlan(Task task, PrintStream out, JSONObject parentJSON, boolean extended, boolean jsonOutput, HashSet> displayedSet, int indent) throws Exception { @@ -534,7 +576,7 @@ private static JSONObject outputPlan(Task task, return null; } - private static JSONObject outputDependencies(Task task, + private JSONObject outputDependencies(Task task, Set> dependeciesTaskSet, PrintStream out, JSONObject parentJson, boolean jsonOutput, int indent, boolean rootTskCandidate) throws Exception { @@ -640,7 +682,7 @@ private static JSONObject outputDependencies(Task task, return jsonOutput ? json : null; } - public static String outputAST(String treeString, PrintStream out, + public String outputAST(String treeString, PrintStream out, boolean jsonOutput, int indent) throws JSONException { if (out != null) { out.print(indentString(indent)); @@ -652,7 +694,7 @@ public static String outputAST(String treeString, PrintStream out, return jsonOutput ? treeString : null; } - public static JSONObject outputDependencies(PrintStream out, boolean jsonOutput, + public JSONObject outputDependencies(PrintStream out, boolean jsonOutput, List> rootTasks, int indent) throws Exception { if (out != null) { @@ -676,7 +718,7 @@ public static JSONObject outputDependencies(PrintStream out, boolean jsonOutput, return jsonOutput ? json : null; } - public static JSONObject outputStagePlans(PrintStream out, ExplainWork work, + public JSONObject outputStagePlans(PrintStream out, ExplainWork work, List> rootTasks, int indent) throws Exception { boolean jsonOutput = work.isFormatted(); @@ -698,7 +740,7 @@ public static JSONObject outputStagePlans(PrintStream out, ExplainWork work, * MethodComparator. * */ - public static class MethodComparator implements Comparator { + public class MethodComparator implements Comparator { public int compare(Object o1, Object o2) { Method m1 = (Method) o1; Method m2 = (Method) o2; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index a19a087..d431dd2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -40,26 +40,31 @@ public ExplainSemanticAnalyzer(HiveConf conf) throws SemanticException { super(conf); } + @SuppressWarnings("unchecked") @Override public void analyzeInternal(ASTNode ast) throws SemanticException { - ctx.setExplain(true); - - // Create a semantic analyzer for the query - BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, (ASTNode) ast - .getChild(0)); - sem.analyze((ASTNode) ast.getChild(0), ctx); - sem.validate(); boolean extended = false; boolean formatted = false; boolean dependency = false; + boolean physical = false; if (ast.getChildCount() == 2) { int explainOptions = ast.getChild(1).getType(); formatted = (explainOptions == HiveParser.KW_FORMATTED); extended = (explainOptions == HiveParser.KW_EXTENDED); dependency = (explainOptions == HiveParser.KW_DEPENDENCY); + physical = (explainOptions == HiveParser.KW_PHYSICAL); } + ctx.setExplain(true); + ctx.setExplainPhysical(physical); + + // Create a semantic analyzer for the query + BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, (ASTNode) ast + .getChild(0)); + sem.analyze((ASTNode) ast.getChild(0), ctx); + sem.validate(); + ctx.setResFile(new Path(ctx.getLocalTmpFileURI())); List> tasks = sem.getRootTasks(); Task fetchTask = sem.getFetchTask(); @@ -72,14 +77,21 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { tasks.add(fetchTask); } + ParseContext pCtx = null; + if (sem instanceof SemanticAnalyzer) { + pCtx = ((SemanticAnalyzer)sem).getParseContext(); + } + Task explTask = TaskFactory.get(new ExplainWork(ctx.getResFile().toString(), + pCtx, tasks, ((ASTNode) ast.getChild(0)).toStringTree(), sem.getInputs(), extended, formatted, - dependency), + dependency, + physical), conf); fieldList = explTask.getResultSchema(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 7191b14..6824b53 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -161,6 +161,7 @@ KW_EXTENDED: 'EXTENDED'; KW_FORMATTED: 'FORMATTED'; KW_PRETTY: 'PRETTY'; KW_DEPENDENCY: 'DEPENDENCY'; +KW_PHYSICAL: 'PHYSICAL'; KW_SERDE: 'SERDE'; KW_WITH: 'WITH'; KW_DEFERRED: 'DEFERRED'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 6fce3da..8790fed 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -558,7 +558,7 @@ statement explainStatement @init { msgs.push("explain statement"); } @after { msgs.pop(); } - : KW_EXPLAIN (explainOptions=KW_EXTENDED|explainOptions=KW_FORMATTED|explainOptions=KW_DEPENDENCY)? execStatement + : KW_EXPLAIN (explainOptions=KW_EXTENDED|explainOptions=KW_FORMATTED|explainOptions=KW_DEPENDENCY|explainOptions=KW_PHYSICAL)? execStatement -> ^(TOK_EXPLAIN execStatement $explainOptions?) ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 8a5ef0e..b2bcf8e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -512,5 +512,5 @@ identifier nonReserved : - KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_INNER + KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_PHYSICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_INNER ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 142c974..b62d706 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8317,12 +8317,14 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess()); } - // At this point we have the complete operator tree - // from which we want to create the map-reduce plan - MapReduceCompiler compiler = new MapReduceCompiler(); - compiler.init(conf, console, db); - compiler.compile(pCtx, rootTasks, inputs, outputs); - fetchTask = pCtx.getFetchTask(); + if (!ctx.getExplainPhysical()) { + // At this point we have the complete operator tree + // from which we want to create the map-reduce plan + MapReduceCompiler compiler = new MapReduceCompiler(); + compiler.init(conf, console, db); + compiler.compile(pCtx, rootTasks, inputs, outputs); + fetchTask = pCtx.getFetchTask(); + } LOG.info("Completed plan generation"); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index 166b5da..bdbb043 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.parse.ParseContext; /** * ExplainWork. @@ -37,20 +38,26 @@ private ArrayList> rootTasks; private String astStringTree; private HashSet inputs; + private ParseContext pCtx; + boolean extended; boolean formatted; boolean dependency; + boolean physical; + public ExplainWork() { } public ExplainWork(String resFile, + ParseContext pCtx, List> rootTasks, String astStringTree, HashSet inputs, boolean extended, boolean formatted, - boolean dependency) { + boolean dependency, + boolean physical) { this.resFile = resFile; this.rootTasks = new ArrayList>(rootTasks); this.astStringTree = astStringTree; @@ -58,6 +65,8 @@ public ExplainWork(String resFile, this.extended = extended; this.formatted = formatted; this.dependency = dependency; + this.physical = physical; + this.pCtx = pCtx; } public String getResFile() { @@ -115,4 +124,21 @@ public boolean isFormatted() { public void setFormatted(boolean formatted) { this.formatted = formatted; } + + public ParseContext getParseContext() { + return pCtx; + } + + public void setParseContext(ParseContext pCtx) { + this.pCtx = pCtx; + } + + public boolean isPhysical() { + return physical; + } + + public void setPhysical(boolean physical) { + this.physical = physical; + } + } diff --git ql/src/test/queries/clientpositive/explain_physical.q ql/src/test/queries/clientpositive/explain_physical.q new file mode 100644 index 0000000..3bfca84 --- /dev/null +++ ql/src/test/queries/clientpositive/explain_physical.q @@ -0,0 +1,36 @@ +-- This test is used for testing EXPLAIN PHYSICAL command + +-- Create some views +CREATE VIEW V1 AS SELECT key, value from src; +CREATE VIEW V2 AS SELECT ds, key, value FROM srcpart WHERE ds IS NOT NULL; +CREATE VIEW V3 AS + SELECT src1.key, src2.value FROM V2 src1 + JOIN src src2 ON src1.key = src2.key WHERE src1.ds IS NOT NULL; +CREATE VIEW V4 AS + SELECT src1.key, src2.value as value1, src3.value as value2 + FROM V1 src1 JOIN V2 src2 on src1.key = src2.key JOIN src src3 ON src2.key = src3.key; + +-- Simple select queries, union queries and join queries +EXPLAIN PHYSICAL + SELECT key, count(1) FROM srcpart WHERE ds IS NOT NULL GROUP BY key; +EXPLAIN PHYSICAL + SELECT key, count(1) FROM (SELECT key, value FROM src) subq1 GROUP BY key; +EXPLAIN PHYSICAL + SELECT * FROM ( + SELECT key, value FROM src UNION ALL SELECT key, value FROM srcpart WHERE ds IS NOT NULL + ) S1; +EXPLAIN PHYSICAL + SELECT S1.key, S2.value FROM src S1 JOIN srcpart S2 ON S1.key = S2.key WHERE ds IS NOT NULL; + +-- With views +EXPLAIN PHYSICAL SELECT * FROM V1; +EXPLAIN PHYSICAL SELECT * FROM V2; +EXPLAIN PHYSICAL SELECT * FROM V3; +EXPLAIN PHYSICAL SELECT * FROM V4; + +-- The table should show up in the explain physical even if none +-- of the partitions are selected. +CREATE VIEW V5 as SELECT * FROM srcpart where ds = '10'; +EXPLAIN PHYSICAL SELECT * FROM V5; + +EXPLAIN PHYSICAL SELECT s1.key, s1.cnt, s2.value FROM (SELECT key, count(value) as cnt FROM src GROUP BY key) s1 JOIN src s2 ON (s1.key = s2.key) ORDER BY s1.key; \ No newline at end of file diff --git ql/src/test/results/clientpositive/explain_physical.q.out ql/src/test/results/clientpositive/explain_physical.q.out new file mode 100644 index 0000000..bfac512 --- /dev/null +++ ql/src/test/results/clientpositive/explain_physical.q.out @@ -0,0 +1,676 @@ +PREHOOK: query: -- This test is used for testing EXPLAIN PHYSICAL command + +-- Create some views +CREATE VIEW V1 AS SELECT key, value from src +PREHOOK: type: CREATEVIEW +POSTHOOK: query: -- This test is used for testing EXPLAIN PHYSICAL command + +-- Create some views +CREATE VIEW V1 AS SELECT key, value from src +POSTHOOK: type: CREATEVIEW +POSTHOOK: Output: default@V1 +PREHOOK: query: CREATE VIEW V2 AS SELECT ds, key, value FROM srcpart WHERE ds IS NOT NULL +PREHOOK: type: CREATEVIEW +POSTHOOK: query: CREATE VIEW V2 AS SELECT ds, key, value FROM srcpart WHERE ds IS NOT NULL +POSTHOOK: type: CREATEVIEW +POSTHOOK: Output: default@V2 +PREHOOK: query: CREATE VIEW V3 AS + SELECT src1.key, src2.value FROM V2 src1 + JOIN src src2 ON src1.key = src2.key WHERE src1.ds IS NOT NULL +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@v2 +POSTHOOK: query: CREATE VIEW V3 AS + SELECT src1.key, src2.value FROM V2 src1 + JOIN src src2 ON src1.key = src2.key WHERE src1.ds IS NOT NULL +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@v2 +POSTHOOK: Output: default@V3 +PREHOOK: query: CREATE VIEW V4 AS + SELECT src1.key, src2.value as value1, src3.value as value2 + FROM V1 src1 JOIN V2 src2 on src1.key = src2.key JOIN src src3 ON src2.key = src3.key +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@v1 +PREHOOK: Input: default@v2 +POSTHOOK: query: CREATE VIEW V4 AS + SELECT src1.key, src2.value as value1, src3.value as value2 + FROM V1 src1 JOIN V2 src2 on src1.key = src2.key JOIN src src3 ON src2.key = src3.key +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@v1 +POSTHOOK: Input: default@v2 +POSTHOOK: Output: default@V4 +PREHOOK: query: -- Simple select queries, union queries and join queries +EXPLAIN PHYSICAL + SELECT key, count(1) FROM srcpart WHERE ds IS NOT NULL GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- Simple select queries, union queries and join queries +EXPLAIN PHYSICAL + SELECT key, count(1) FROM srcpart WHERE ds IS NOT NULL GROUP BY key +POSTHOOK: type: QUERY +PHYSICAL PLAN +srcpart + TableScan (TS_0) + alias: srcpart + Select Operator (SEL_2) + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator (GBY_3) + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator (RS_4) + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Group By Operator (GBY_5) + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator (SEL_6) + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator (FS_7) + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + +PREHOOK: query: EXPLAIN PHYSICAL + SELECT key, count(1) FROM (SELECT key, value FROM src) subq1 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN PHYSICAL + SELECT key, count(1) FROM (SELECT key, value FROM src) subq1 GROUP BY key +POSTHOOK: type: QUERY +PHYSICAL PLAN +subq1:src + TableScan (TS_0) + alias: src + Select Operator (SEL_1) + expressions: + expr: key + type: string + outputColumnNames: _col0 + Group By Operator (GBY_3) + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator (RS_4) + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Group By Operator (GBY_5) + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator (SEL_6) + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator (FS_7) + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + +PREHOOK: query: EXPLAIN PHYSICAL + SELECT * FROM ( + SELECT key, value FROM src UNION ALL SELECT key, value FROM srcpart WHERE ds IS NOT NULL + ) S1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN PHYSICAL + SELECT * FROM ( + SELECT key, value FROM src UNION ALL SELECT key, value FROM srcpart WHERE ds IS NOT NULL + ) S1 +POSTHOOK: type: QUERY +PHYSICAL PLAN +null-subquery1:s1-subquery1:src + TableScan (TS_0) + alias: src + Select Operator (SEL_1) + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Union (UNION_5) + Select Operator (SEL_6) + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator (FS_7) + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +null-subquery2:s1-subquery2:srcpart + TableScan (TS_2) + alias: srcpart + Select Operator (SEL_4) + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Union (UNION_5) + + +PREHOOK: query: EXPLAIN PHYSICAL + SELECT S1.key, S2.value FROM src S1 JOIN srcpart S2 ON S1.key = S2.key WHERE ds IS NOT NULL +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN PHYSICAL + SELECT S1.key, S2.value FROM src S1 JOIN srcpart S2 ON S1.key = S2.key WHERE ds IS NOT NULL +POSTHOOK: type: QUERY +PHYSICAL PLAN +s1 + TableScan (TS_1) + alias: s1 + Reduce Output Operator (RS_2) + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + Join Operator (JOIN_4) + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col5 + Select Operator (SEL_6) + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + File Output Operator (FS_7) + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +s2 + TableScan (TS_0) + alias: s2 + Reduce Output Operator (RS_3) + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: value + type: string + Join Operator (JOIN_4) + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col5 + + +PREHOOK: query: -- With views +EXPLAIN PHYSICAL SELECT * FROM V1 +PREHOOK: type: QUERY +POSTHOOK: query: -- With views +EXPLAIN PHYSICAL SELECT * FROM V1 +POSTHOOK: type: QUERY +PHYSICAL PLAN +v1:src + TableScan (TS_0) + alias: src + Select Operator (SEL_1) + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator (FS_3) + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + +PREHOOK: query: EXPLAIN PHYSICAL SELECT * FROM V2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN PHYSICAL SELECT * FROM V2 +POSTHOOK: type: QUERY +PHYSICAL PLAN +v2:srcpart + TableScan (TS_0) + alias: srcpart + Select Operator (SEL_2) + expressions: + expr: ds + type: string + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator (FS_4) + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + +PREHOOK: query: EXPLAIN PHYSICAL SELECT * FROM V3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN PHYSICAL SELECT * FROM V3 +POSTHOOK: type: QUERY +PHYSICAL PLAN +v3:src1:srcpart + TableScan (TS_0) + alias: srcpart + Select Operator (SEL_2) + expressions: + expr: key + type: string + outputColumnNames: _col1 + Reduce Output Operator (RS_4) + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 0 + value expressions: + expr: _col1 + type: string + Join Operator (JOIN_6) + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col1, _col4 + Select Operator (SEL_8) + expressions: + expr: _col1 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1 + File Output Operator (FS_10) + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +v3:src2 + TableScan (TS_3) + alias: src2 + Reduce Output Operator (RS_5) + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: value + type: string + Join Operator (JOIN_6) + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col1, _col4 + + +PREHOOK: query: EXPLAIN PHYSICAL SELECT * FROM V4 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN PHYSICAL SELECT * FROM V4 +POSTHOOK: type: QUERY +PHYSICAL PLAN +v4:src1:src + TableScan (TS_3) + alias: src + Select Operator (SEL_4) + expressions: + expr: key + type: string + outputColumnNames: _col0 + Reduce Output Operator (RS_6) + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + Join Operator (JOIN_9) + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col2} + 2 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col4, _col6 + Select Operator (SEL_10) + expressions: + expr: _col0 + type: string + expr: _col4 + type: string + expr: _col6 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator (FS_12) + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +v4:src2:srcpart + TableScan (TS_0) + alias: srcpart + Select Operator (SEL_2) + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col1, _col2 + Reduce Output Operator (RS_7) + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 1 + value expressions: + expr: _col2 + type: string + Join Operator (JOIN_9) + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col2} + 2 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col4, _col6 +v4:src3 + TableScan (TS_5) + alias: src3 + Reduce Output Operator (RS_8) + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 2 + value expressions: + expr: value + type: string + Join Operator (JOIN_9) + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col2} + 2 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col4, _col6 + + +PREHOOK: query: -- The table should show up in the explain physical even if none +-- of the partitions are selected. +CREATE VIEW V5 as SELECT * FROM srcpart where ds = '10' +PREHOOK: type: CREATEVIEW +POSTHOOK: query: -- The table should show up in the explain physical even if none +-- of the partitions are selected. +CREATE VIEW V5 as SELECT * FROM srcpart where ds = '10' +POSTHOOK: type: CREATEVIEW +POSTHOOK: Output: default@V5 +PREHOOK: query: EXPLAIN PHYSICAL SELECT * FROM V5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN PHYSICAL SELECT * FROM V5 +POSTHOOK: type: QUERY +PHYSICAL PLAN +v5:srcpart + TableScan (TS_0) + alias: srcpart + Filter Operator (FIL_5) + predicate: + expr: (ds = '10') + type: boolean + Select Operator (SEL_2) + expressions: + expr: key + type: string + expr: value + type: string + expr: ds + type: string + expr: hr + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator (FS_4) + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + +PREHOOK: query: EXPLAIN PHYSICAL SELECT s1.key, s1.cnt, s2.value FROM (SELECT key, count(value) as cnt FROM src GROUP BY key) s1 JOIN src s2 ON (s1.key = s2.key) ORDER BY s1.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN PHYSICAL SELECT s1.key, s1.cnt, s2.value FROM (SELECT key, count(value) as cnt FROM src GROUP BY key) s1 JOIN src s2 ON (s1.key = s2.key) ORDER BY s1.key +POSTHOOK: type: QUERY +PHYSICAL PLAN +s1:src + TableScan (TS_0) + alias: src + Select Operator (SEL_1) + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator (GBY_2) + aggregations: + expr: count(value) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator (RS_3) + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Group By Operator (GBY_4) + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator (SEL_5) + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Reduce Output Operator (RS_7) + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Join Operator (JOIN_9) + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col3 + Select Operator (SEL_10) + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator (RS_11) + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _col2 + type: string + Extract (EX_12) + File Output Operator (FS_13) + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +s2 + TableScan (TS_6) + alias: s2 + Reduce Output Operator (RS_8) + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: value + type: string + Join Operator (JOIN_9) + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col3 + +