diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java index 0038f73..5b2c9c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java @@ -32,7 +32,7 @@ public enum UnsupportedFeature { Distinct_without_an_aggreggation, Duplicates_in_RR, Filter_expression_with_non_boolean_return_type, Having_clause_without_any_groupby, Hint, Invalid_column_reference, Invalid_decimal, - Less_than_equal_greater_than, Multi_insert, Others, Same_name_in_multiple_expressions, + Less_than_equal_greater_than, Others, Same_name_in_multiple_expressions, Schema_less_table, Select_alias_in_having_clause, Select_transform, Subquery, Table_sample_clauses, UDTF, Union_type, Unique_join }; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index fdb468d..a529276 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -23,10 +23,11 @@ import java.lang.reflect.UndeclaredThrowableException; import java.math.BigDecimal; import java.util.AbstractMap.SimpleEntry; -import java.util.ArrayList; import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; +import java.util.Collection; import java.util.Collections; import java.util.Deque; import java.util.EnumSet; @@ -38,9 +39,11 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import org.antlr.runtime.ClassicToken; +import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.TreeVisitor; import org.antlr.runtime.tree.TreeVisitorAction; import org.apache.calcite.adapter.druid.DruidQuery; @@ -111,7 +114,6 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.ImmutableIntList; import org.apache.calcite.util.Pair; -import org.apache.commons.lang.mutable.MutableBoolean; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -139,7 +141,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -187,6 +188,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsWithStatsRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinRule; @@ -242,11 +244,12 @@ import org.joda.time.Interval; import com.google.common.base.Function; +import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.math.IntMath; +import com.google.common.collect.Multimap; public class CalcitePlanner extends SemanticAnalyzer { @@ -330,9 +333,13 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query } runCBO = canCBOHandleAst(queryForCbo, getQB(), cboCtx); - profilesCBO = obtainCBOProfiles(queryProperties); + if (queryProperties.hasMultiDestQuery()) { + handleMultiDestQuery(ast, cboCtx); + } if (runCBO) { + profilesCBO = obtainCBOProfiles(queryProperties); + disableJoinMerge = true; boolean reAnalyzeAST = false; final boolean materializedView = getQB().isMaterializedView(); @@ -456,6 +463,168 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept return sinkOp; } + /* + * Rewrites multi-insert query. Returns true if rewriting is successful, false otherwise. + */ + private void handleMultiDestQuery(ASTNode ast, PreCboCtx cboCtx) throws SemanticException { + // Not supported by CBO + if (!runCBO) { + return; + } + // Currently, we only optimized the query the content of the FROM clause + // for multi-insert queries. Thus, nodeOfInterest is the FROM clause + if (isJoinToken(cboCtx.nodeOfInterest)) { + // Join clause: rewriting is needed + ASTNode subq = rewriteASTForMultiInsert(ast, cboCtx.nodeOfInterest); + if (subq != null) { + // We could rewrite into a subquery + cboCtx.nodeOfInterest = (ASTNode) subq.getChild(0); + QB newQB = new QB(null, "", false); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1(cboCtx.nodeOfInterest, newQB, ctx_1, null); + setQB(newQB); + getMetaData(getQB()); + } else { + runCBO = false; + } + } else if (cboCtx.nodeOfInterest.getToken().getType() == HiveParser.TOK_SUBQUERY) { + // Subquery: no rewriting needed + ASTNode subq = cboCtx.nodeOfInterest; + // First child is subquery, second child is alias + // We set the node of interest and QB to the subquery + // We do not need to generate the QB again, but rather we use it directly + cboCtx.nodeOfInterest = (ASTNode) subq.getChild(0); + String subQAlias = unescapeIdentifier(subq.getChild(1).getText()); + final QB newQB = getQB().getSubqForAlias(subQAlias).getQB(); + newQB.getParseInfo().setAlias(""); + newQB.getParseInfo().setIsSubQ(false); + setQB(newQB); + } else { + // No need to run CBO (table ref or virtual table) or not supported + runCBO = false; + } + } + + private ASTNode rewriteASTForMultiInsert(ASTNode query, ASTNode nodeOfInterest) { + // 1. gather references from original query + // These are the references in the insert clauses: we keep them as we will + // need to modify them after creating the subquery + final List nodes = new ArrayList(); + // This is a map from aliases to references + final Multimap aliasNodes = ArrayListMultimap.create(); + // To know if we need to bail out + final AtomicBoolean notSupported = new AtomicBoolean(false); + TreeVisitorAction action = new TreeVisitorAction() { + @Override + public Object pre(Object t) { + if (!notSupported.get()) { + if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_ALLCOLREF) { + // TODO: this is a limitation of the AST rewriting approach that we will + // not be able to overcome till proper integration of full multi-insert + // queries with Calcite is implemented. + // The current rewriting gather references from insert clauses and then + // updates them with the new subquery references. However, if insert + // clauses use * or tab.*, we cannot resolve the columns that we are + // referring to. Thus, we just bail out and those queries will not be + // currently optimized by Calcite. + // An example of such query is: + // FROM T_A a LEFT JOIN T_B b ON a.id = b.id + // INSERT OVERWRITE TABLE join_result_1 + // SELECT a.*, b.* + // INSERT OVERWRITE TABLE join_result_3 + // SELECT a.*, b.*; + notSupported.set(true); + } else if (ParseDriver.adaptor.getType(t) == HiveParser.DOT) { + Object c = ParseDriver.adaptor.getChild(t, 0); + if (c != null && ParseDriver.adaptor.getType(c) == HiveParser.TOK_TABLE_OR_COL) { + nodes.add(t); + aliasNodes.put(((ASTNode) t).toStringTree(), t); + } + } else if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_TABLE_OR_COL) { + Object p = ParseDriver.adaptor.getParent(t); + if (p == null || ParseDriver.adaptor.getType(p) != HiveParser.DOT) { + nodes.add(t); + aliasNodes.put(((ASTNode) t).toStringTree(), t); + } + } + } + return t; + } + @Override + public Object post(Object t) { + return t; + } + }; + TreeVisitor tv = new TreeVisitor(ParseDriver.adaptor); + // We will iterate through the children: if it is an INSERT, we will traverse + // the subtree to gather the references + for (int i = 0; i < query.getChildCount(); i++) { + ASTNode child = (ASTNode) query.getChild(i); + if (ParseDriver.adaptor.getType(child) != HiveParser.TOK_INSERT) { + // If it is not an INSERT, we do not need to anything + continue; + } + tv.visit(child, action); + } + if (notSupported.get()) { + // Bail out + return null; + } + // 2. rewrite into query + // TOK_QUERY + // TOK_FROM + // join + // TOK_INSERT + // TOK_DESTINATION + // TOK_DIR + // TOK_TMP_FILE + // TOK_SELECT + // refs + ASTNode from = new ASTNode(new CommonToken(HiveParser.TOK_FROM, "TOK_FROM")); + from.addChild((ASTNode) ParseDriver.adaptor.dupTree(nodeOfInterest)); + ASTNode destination = new ASTNode(new CommonToken(HiveParser.TOK_DESTINATION, "TOK_DESTINATION")); + ASTNode dir = new ASTNode(new CommonToken(HiveParser.TOK_DIR, "TOK_DIR")); + ASTNode tmpFile = new ASTNode(new CommonToken(HiveParser.TOK_TMP_FILE, "TOK_TMP_FILE")); + dir.addChild(tmpFile); + destination.addChild(dir); + ASTNode select = new ASTNode(new CommonToken(HiveParser.TOK_SELECT, "TOK_SELECT")); + int num = 0; + for (Collection selectIdentifier : aliasNodes.asMap().values()) { + Iterator it = selectIdentifier.iterator(); + ASTNode node = (ASTNode) it.next(); + // Add select expression + ASTNode selectExpr = new ASTNode(new CommonToken(HiveParser.TOK_SELEXPR, "TOK_SELEXPR")); + selectExpr.addChild((ASTNode) ParseDriver.adaptor.dupTree(node)); // Identifier + String colAlias = "col" + num; + selectExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias))); // Alias + select.addChild(selectExpr); + // Rewrite INSERT references + ASTNode colExpr = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL")); + colExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias))); + replaceASTChild(node, colExpr); + while (it.hasNext()) { + node = (ASTNode) it.next(); + colExpr = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL")); + colExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias))); + replaceASTChild(node, colExpr); + } + num++; + } + ASTNode insert = new ASTNode(new CommonToken(HiveParser.TOK_INSERT, "TOK_INSERT")); + insert.addChild(destination); + insert.addChild(select); + ASTNode newQuery = new ASTNode(new CommonToken(HiveParser.TOK_QUERY, "TOK_QUERY")); + newQuery.addChild(from); + newQuery.addChild(insert); + // 3. create subquery + ASTNode subq = new ASTNode(new CommonToken(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY")); + subq.addChild(newQuery); + subq.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, "subq"))); + replaceASTChild(nodeOfInterest, subq); + // 4. return subquery + return subq; + } + /** * Can CBO handle the given AST? * @@ -478,7 +647,8 @@ boolean canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) { || qb.isCTAS() || qb.isMaterializedView(); // Queries without a source table currently are not supported by CBO boolean isSupportedType = (qb.getIsQuery() && !qb.containsQueryWithoutSourceTable()) - || qb.isCTAS() || qb.isMaterializedView() || cboCtx.type == PreCboCtx.Type.INSERT; + || qb.isCTAS() || qb.isMaterializedView() || cboCtx.type == PreCboCtx.Type.INSERT + || cboCtx.type == PreCboCtx.Type.MULTI_INSERT; boolean noBadTokens = HiveCalciteUtil.validateASTForUnsupportedTokens(ast); boolean result = isSupportedRoot && isSupportedType && (getCreateViewDesc() == null || getCreateViewDesc().isMaterialized()) @@ -544,7 +714,7 @@ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript() - && !queryProperties.hasMultiDestQuery() && !queryProperties.hasLateralViews()) { + && !queryProperties.hasLateralViews()) { // Ok to run CBO. return null; } @@ -562,8 +732,6 @@ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, msg += "has PTF; "; if (queryProperties.usesScript()) msg += "uses scripts; "; - if (queryProperties.hasMultiDestQuery()) - msg += "is a multi-destination query; "; if (queryProperties.hasLateralViews()) msg += "has lateral views; "; @@ -666,7 +834,7 @@ String fixCtasColumnName(String colName) { */ static class PreCboCtx extends PlannerContext { enum Type { - NONE, INSERT, CTAS_OR_MV, UNEXPECTED + NONE, INSERT, MULTI_INSERT, CTAS_OR_MV, UNEXPECTED } private ASTNode nodeOfInterest; @@ -694,6 +862,17 @@ void setInsertToken(ASTNode ast, boolean isTmpFileDest) { set(PreCboCtx.Type.INSERT, ast); } } + + @Override + void setMultiInsertToken(ASTNode child) { + set(PreCboCtx.Type.MULTI_INSERT, child); + } + + @Override + void resetToken() { + this.type = Type.NONE; + this.nodeOfInterest = null; + } } ASTNode fixUpAfterCbo(ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx) @@ -724,6 +903,12 @@ ASTNode fixUpAfterCbo(ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx) return newAst; } + case MULTI_INSERT: { + // Patch the optimized query back into original FROM clause. + replaceASTChild(cboCtx.nodeOfInterest, newAst); + return originalAst; + } + default: throw new AssertionError("Unexpected type " + cboCtx.type); } @@ -3817,14 +4002,7 @@ public Object post(Object t) { } private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { - QBParseInfo qbp = qb.getParseInfo(); - if (qbp.getClauseNames().size() > 1) { - String msg = String.format("Multi Insert is currently not supported in CBO," - + " turn off cbo to use Multi Insert."); - LOG.debug(msg); - throw new CalciteSemanticException(msg, UnsupportedFeature.Multi_insert); - } - return qbp; + return qb.getParseInfo(); } private List getTabAliases(RowResolver inputRR) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index f549dff..7bf1c59 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -40,8 +40,8 @@ **/ public class QBParseInfo { - private final boolean isSubQ; - private final String alias; + private boolean isSubQ; + private String alias; private ASTNode joinExpr; private ASTNode hints; private final HashMap aliasToSrc; @@ -66,6 +66,7 @@ // insertIntoTables/insertOverwriteTables map a table's fullName to its ast; private final Map insertIntoTables; private final Map insertOverwriteTables; + private ASTNode queryFromExpr; private boolean isAnalyzeCommand; // used for the analyze command (statistics) private boolean isNoScanAnalyzeCommand; // used for the analyze command (statistics) (noscan) @@ -235,6 +236,10 @@ public void setSelExprForClause(String clause, ASTNode ast) { destToSelExpr.put(clause, ast); } + public void setQueryFromExpr(ASTNode ast) { + queryFromExpr = ast; + } + public void setWhrExprForClause(String clause, ASTNode ast) { destToWhereExpr.put(clause, ast); } @@ -354,6 +359,10 @@ public ASTNode getSelForClause(String clause) { return destToSelExpr.get(clause); } + public ASTNode getQueryFrom() { + return queryFromExpr; + } + /** * Get the Cluster By AST for the clause. * @@ -415,10 +424,18 @@ public String getAlias() { return alias; } + public void setAlias(String alias) { + this.alias = alias; + } + public boolean getIsSubQ() { return isSubQ; } + public void setIsSubQ(boolean isSubQ) { + this.isSubQ = isSubQ; + } + public ASTNode getJoinExpr() { return joinExpr; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d0131b7..80d6c5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1482,15 +1482,19 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast)); } - if (plannerCtx != null) { - plannerCtx.setInsertToken(ast, isTmpFileDest); - } - qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); handleInsertStatementSpecPhase1(ast, qbp, ctx_1); - if (qbp.getClauseNamesForDest().size() > 1) { + + if (qbp.getClauseNamesForDest().size() == 2) { queryProperties.setMultiDestQuery(true); } + + if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) { + plannerCtx.setInsertToken(ast, isTmpFileDest); + } else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) { + plannerCtx.resetToken(); + plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0)); + } break; case HiveParser.TOK_FROM: @@ -1500,6 +1504,10 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan "Multiple Children " + child_count)); } + if (!qbp.getIsSubQ()) { + qbp.setQueryFromExpr(ast); + } + // Check if this is a subquery / lateral view ASTNode frm = (ASTNode) ast.getChild(0); if (frm.getToken().getType() == HiveParser.TOK_TABREF) { @@ -10662,6 +10670,11 @@ void setCTASOrMVToken(ASTNode child) { void setInsertToken(ASTNode ast, boolean isTmpFileDest) { } + void setMultiInsertToken(ASTNode child) { + } + + void resetToken() { + } } private Table getTableObjectByName(String tableName) throws HiveException { diff --git ql/src/test/queries/clientpositive/multi_insert_gby4.q ql/src/test/queries/clientpositive/multi_insert_gby4.q new file mode 100644 index 0000000..2e22096 --- /dev/null +++ ql/src/test/queries/clientpositive/multi_insert_gby4.q @@ -0,0 +1,26 @@ +-- SORT_QUERY_RESULTS + +create table e1 (key string, count int); +create table e2 (key string, count int); +create table e3 (key string, count int); + +explain +FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key; + +FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key; + +select * from e1; +select * from e2; +select * from e3; diff --git ql/src/test/queries/clientpositive/multi_insert_with_join2.q ql/src/test/queries/clientpositive/multi_insert_with_join2.q index 1529fa2..ce66035 100644 --- ql/src/test/queries/clientpositive/multi_insert_with_join2.q +++ ql/src/test/queries/clientpositive/multi_insert_with_join2.q @@ -1,4 +1,4 @@ -set hive.cbo.enable=false; +set hive.strict.checks.cartesian.product=false; CREATE TABLE T_A ( id STRING, val STRING ); CREATE TABLE T_B ( id STRING, val STRING ); @@ -49,3 +49,21 @@ WHERE b.id = 'Id_1' AND b.val = 'val_103' INSERT OVERWRITE TABLE join_result_3 SELECT a.*, b.* WHERE b.val = 'val_104' AND b.id = 'Id_2'; + +explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT * +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT * +WHERE b.val = 'val_104' AND b.id = 'Id_2'; + +explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.id, a.val, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, a.val, b.id, b.val +WHERE b.val = 'val_104' AND b.id = 'Id_2'; diff --git ql/src/test/results/clientpositive/multi_insert_gby4.q.out ql/src/test/results/clientpositive/multi_insert_gby4.q.out new file mode 100644 index 0000000..1536d4a --- /dev/null +++ ql/src/test/results/clientpositive/multi_insert_gby4.q.out @@ -0,0 +1,279 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table e1 (key string, count int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table e1 (key string, count int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e1 +PREHOOK: query: create table e2 (key string, count int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e2 +POSTHOOK: query: create table e2 (key string, count int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e2 +PREHOOK: query: create table e3 (key string, count int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e3 +POSTHOOK: query: create table e3 (key string, count int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e3 +PREHOOK: query: explain +FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > 490) or ((_col0 > 500) or (_col0 > 450))) (type: boolean) + Statistics: Num rows: 498 Data size: 5290 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 498 Data size: 5290 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 498 Data size: 5290 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 > 450) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + Filter Operator + predicate: (KEY._col0 > 500) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + Filter Operator + predicate: (KEY._col0 > 490) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 + + Stage: Stage-6 + Stats-Aggr Operator + +PREHOOK: query: FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@e1 +PREHOOK: Output: default@e2 +PREHOOK: Output: default@e3 +POSTHOOK: query: FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@e1 +POSTHOOK: Output: default@e2 +POSTHOOK: Output: default@e3 +POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: e2.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: e3.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from e1 +PREHOOK: type: QUERY +PREHOOK: Input: default@e1 +#### A masked pattern was here #### +POSTHOOK: query: select * from e1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e1 +#### A masked pattern was here #### +452 1 +453 1 +454 3 +455 1 +457 1 +458 2 +459 2 +460 1 +462 2 +463 2 +466 3 +467 1 +468 4 +469 5 +470 1 +472 1 +475 1 +477 1 +478 2 +479 1 +480 3 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 4 +490 1 +491 1 +492 2 +493 1 +494 1 +495 1 +496 1 +497 1 +498 3 +PREHOOK: query: select * from e2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e2 +#### A masked pattern was here #### +POSTHOOK: query: select * from e2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e2 +#### A masked pattern was here #### +PREHOOK: query: select * from e3 +PREHOOK: type: QUERY +PREHOOK: Input: default@e3 +#### A masked pattern was here #### +POSTHOOK: query: select * from e3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e3 +#### A masked pattern was here #### +491 1 +492 2 +493 1 +494 1 +495 1 +496 1 +497 1 +498 3 diff --git ql/src/test/results/clientpositive/multi_insert_union_src.q.out ql/src/test/results/clientpositive/multi_insert_union_src.q.out index 2036e63..1ff1db5 100644 --- ql/src/test/results/clientpositive/multi_insert_union_src.q.out +++ ql/src/test/results/clientpositive/multi_insert_union_src.q.out @@ -64,7 +64,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0) (type: boolean) Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -93,7 +93,7 @@ STAGE PLANS: alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 100) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/multi_insert_with_join2.q.out ql/src/test/results/clientpositive/multi_insert_with_join2.q.out index 70a044d..f777ee4 100644 --- ql/src/test/results/clientpositive/multi_insert_with_join2.q.out +++ ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -50,6 +50,7 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@t_b POSTHOOK: Lineage: t_b.id SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: t_b.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -74,45 +75,49 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: string) - sort order: + - Map-reduce partition columns: id (type: string) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Filter Operator + predicate: (id = 'Id_1') (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: val (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: b Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: string) - sort order: + - Map-reduce partition columns: id (type: string) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Filter Operator + predicate: ((id = 'Id_1') and (val = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: - 0 id (type: string) - 1 id (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.join_result_1 + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'Id_1' (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 Stage: Stage-0 Move Operator @@ -127,6 +132,7 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_3 @@ -151,45 +157,49 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: string) - sort order: + - Map-reduce partition columns: id (type: string) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Filter Operator + predicate: ((id = 'Id_2') and (val <> 'val_104')) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: val (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: b Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: string) - sort order: + - Map-reduce partition columns: id (type: string) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Filter Operator + predicate: ((val = 'val_104') and (id = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: - 0 id (type: string) - 1 id (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 <> 'val_104') and (_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.join_result_3 + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'Id_2' (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 Stage: Stage-0 Move Operator @@ -549,3 +559,253 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT * +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT * +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT * +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT * +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.id, a.val, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, a.val, b.id, b.val +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.id, a.val, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, a.val, b.id, b.val +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col3 = 'Id_1') and (_col2 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator +