diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java index 0038f73..5b2c9c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java @@ -32,7 +32,7 @@ public enum UnsupportedFeature { Distinct_without_an_aggreggation, Duplicates_in_RR, Filter_expression_with_non_boolean_return_type, Having_clause_without_any_groupby, Hint, Invalid_column_reference, Invalid_decimal, - Less_than_equal_greater_than, Multi_insert, Others, Same_name_in_multiple_expressions, + Less_than_equal_greater_than, Others, Same_name_in_multiple_expressions, Schema_less_table, Select_alias_in_having_clause, Select_transform, Subquery, Table_sample_clauses, UDTF, Union_type, Unique_join }; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index fdb468d..06303bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -23,10 +23,11 @@ import java.lang.reflect.UndeclaredThrowableException; import java.math.BigDecimal; import java.util.AbstractMap.SimpleEntry; -import java.util.ArrayList; import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; +import java.util.Collection; import java.util.Collections; import java.util.Deque; import java.util.EnumSet; @@ -38,9 +39,11 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import org.antlr.runtime.ClassicToken; +import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.TreeVisitor; import org.antlr.runtime.tree.TreeVisitorAction; import org.apache.calcite.adapter.druid.DruidQuery; @@ -111,7 +114,6 @@ import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.ImmutableIntList; import org.apache.calcite.util.Pair; -import org.apache.commons.lang.mutable.MutableBoolean; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -139,7 +141,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; @@ -187,6 +188,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsWithStatsRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinRule; @@ -242,11 +244,12 @@ import org.joda.time.Interval; import com.google.common.base.Function; +import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.math.IntMath; +import com.google.common.collect.Multimap; public class CalcitePlanner extends SemanticAnalyzer { @@ -330,9 +333,13 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query } runCBO = canCBOHandleAst(queryForCbo, getQB(), cboCtx); - profilesCBO = obtainCBOProfiles(queryProperties); + if (queryProperties.hasMultiDestQuery()) { + handleMultiDestQuery(ast, cboCtx); + } if (runCBO) { + profilesCBO = obtainCBOProfiles(queryProperties); + disableJoinMerge = true; boolean reAnalyzeAST = false; final boolean materializedView = getQB().isMaterializedView(); @@ -456,6 +463,166 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept return sinkOp; } + /* + * Tries to optimize FROM clause of multi-insert. No attempt to optimize insert clauses of the query. + * Returns true if rewriting is successful, false otherwise. + */ + private void handleMultiDestQuery(ASTNode ast, PreCboCtx cboCtx) throws SemanticException { + // Not supported by CBO + if (!runCBO) { + return; + } + // Currently, we only optimized the query the content of the FROM clause + // for multi-insert queries. Thus, nodeOfInterest is the FROM clause + if (isJoinToken(cboCtx.nodeOfInterest)) { + // Join clause: rewriting is needed + ASTNode subq = rewriteASTForMultiInsert(ast, cboCtx.nodeOfInterest); + if (subq != null) { + // We could rewrite into a subquery + cboCtx.nodeOfInterest = (ASTNode) subq.getChild(0); + QB newQB = new QB(null, "", false); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1(cboCtx.nodeOfInterest, newQB, ctx_1, null); + setQB(newQB); + getMetaData(getQB()); + } else { + runCBO = false; + } + } else if (cboCtx.nodeOfInterest.getToken().getType() == HiveParser.TOK_SUBQUERY) { + // Subquery: no rewriting needed + ASTNode subq = cboCtx.nodeOfInterest; + // First child is subquery, second child is alias + // We set the node of interest and QB to the subquery + // We do not need to generate the QB again, but rather we use it directly + cboCtx.nodeOfInterest = (ASTNode) subq.getChild(0); + String subQAlias = unescapeIdentifier(subq.getChild(1).getText()); + final QB newQB = getQB().getSubqForAlias(subQAlias).getQB(); + newQB.getParseInfo().setAlias(""); + newQB.getParseInfo().setIsSubQ(false); + setQB(newQB); + } else { + // No need to run CBO (table ref or virtual table) or not supported + runCBO = false; + } + } + + private ASTNode rewriteASTForMultiInsert(ASTNode query, ASTNode nodeOfInterest) { + // 1. gather references from original query + // This is a map from aliases to references. + // We keep all references as we will need to modify them after creating + // the subquery + final Multimap aliasNodes = ArrayListMultimap.create(); + // To know if we need to bail out + final AtomicBoolean notSupported = new AtomicBoolean(false); + TreeVisitorAction action = new TreeVisitorAction() { + @Override + public Object pre(Object t) { + if (!notSupported.get()) { + if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_ALLCOLREF) { + // TODO: this is a limitation of the AST rewriting approach that we will + // not be able to overcome till proper integration of full multi-insert + // queries with Calcite is implemented. + // The current rewriting gather references from insert clauses and then + // updates them with the new subquery references. However, if insert + // clauses use * or tab.*, we cannot resolve the columns that we are + // referring to. Thus, we just bail out and those queries will not be + // currently optimized by Calcite. + // An example of such query is: + // FROM T_A a LEFT JOIN T_B b ON a.id = b.id + // INSERT OVERWRITE TABLE join_result_1 + // SELECT a.*, b.* + // INSERT OVERWRITE TABLE join_result_3 + // SELECT a.*, b.*; + notSupported.set(true); + } else if (ParseDriver.adaptor.getType(t) == HiveParser.DOT) { + Object c = ParseDriver.adaptor.getChild(t, 0); + if (c != null && ParseDriver.adaptor.getType(c) == HiveParser.TOK_TABLE_OR_COL) { + aliasNodes.put(((ASTNode) t).toStringTree(), t); + } + } else if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_TABLE_OR_COL) { + Object p = ParseDriver.adaptor.getParent(t); + if (p == null || ParseDriver.adaptor.getType(p) != HiveParser.DOT) { + aliasNodes.put(((ASTNode) t).toStringTree(), t); + } + } + } + return t; + } + @Override + public Object post(Object t) { + return t; + } + }; + TreeVisitor tv = new TreeVisitor(ParseDriver.adaptor); + // We will iterate through the children: if it is an INSERT, we will traverse + // the subtree to gather the references + for (int i = 0; i < query.getChildCount(); i++) { + ASTNode child = (ASTNode) query.getChild(i); + if (ParseDriver.adaptor.getType(child) != HiveParser.TOK_INSERT) { + // If it is not an INSERT, we do not need to anything + continue; + } + tv.visit(child, action); + } + if (notSupported.get()) { + // Bail out + return null; + } + // 2. rewrite into query + // TOK_QUERY + // TOK_FROM + // join + // TOK_INSERT + // TOK_DESTINATION + // TOK_DIR + // TOK_TMP_FILE + // TOK_SELECT + // refs + ASTNode from = new ASTNode(new CommonToken(HiveParser.TOK_FROM, "TOK_FROM")); + from.addChild((ASTNode) ParseDriver.adaptor.dupTree(nodeOfInterest)); + ASTNode destination = new ASTNode(new CommonToken(HiveParser.TOK_DESTINATION, "TOK_DESTINATION")); + ASTNode dir = new ASTNode(new CommonToken(HiveParser.TOK_DIR, "TOK_DIR")); + ASTNode tmpFile = new ASTNode(new CommonToken(HiveParser.TOK_TMP_FILE, "TOK_TMP_FILE")); + dir.addChild(tmpFile); + destination.addChild(dir); + ASTNode select = new ASTNode(new CommonToken(HiveParser.TOK_SELECT, "TOK_SELECT")); + int num = 0; + for (Collection selectIdentifier : aliasNodes.asMap().values()) { + Iterator it = selectIdentifier.iterator(); + ASTNode node = (ASTNode) it.next(); + // Add select expression + ASTNode selectExpr = new ASTNode(new CommonToken(HiveParser.TOK_SELEXPR, "TOK_SELEXPR")); + selectExpr.addChild((ASTNode) ParseDriver.adaptor.dupTree(node)); // Identifier + String colAlias = "col" + num; + selectExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias))); // Alias + select.addChild(selectExpr); + // Rewrite all INSERT references (all the node values for this key) + ASTNode colExpr = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL")); + colExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias))); + replaceASTChild(node, colExpr); + while (it.hasNext()) { + node = (ASTNode) it.next(); + colExpr = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL")); + colExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias))); + replaceASTChild(node, colExpr); + } + num++; + } + ASTNode insert = new ASTNode(new CommonToken(HiveParser.TOK_INSERT, "TOK_INSERT")); + insert.addChild(destination); + insert.addChild(select); + ASTNode newQuery = new ASTNode(new CommonToken(HiveParser.TOK_QUERY, "TOK_QUERY")); + newQuery.addChild(from); + newQuery.addChild(insert); + // 3. create subquery + ASTNode subq = new ASTNode(new CommonToken(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY")); + subq.addChild(newQuery); + subq.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, "subq"))); + replaceASTChild(nodeOfInterest, subq); + // 4. return subquery + return subq; + } + /** * Can CBO handle the given AST? * @@ -478,7 +645,8 @@ boolean canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) { || qb.isCTAS() || qb.isMaterializedView(); // Queries without a source table currently are not supported by CBO boolean isSupportedType = (qb.getIsQuery() && !qb.containsQueryWithoutSourceTable()) - || qb.isCTAS() || qb.isMaterializedView() || cboCtx.type == PreCboCtx.Type.INSERT; + || qb.isCTAS() || qb.isMaterializedView() || cboCtx.type == PreCboCtx.Type.INSERT + || cboCtx.type == PreCboCtx.Type.MULTI_INSERT; boolean noBadTokens = HiveCalciteUtil.validateASTForUnsupportedTokens(ast); boolean result = isSupportedRoot && isSupportedType && (getCreateViewDesc() == null || getCreateViewDesc().isMaterialized()) @@ -544,7 +712,7 @@ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript() - && !queryProperties.hasMultiDestQuery() && !queryProperties.hasLateralViews()) { + && !queryProperties.hasLateralViews()) { // Ok to run CBO. return null; } @@ -562,8 +730,6 @@ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, msg += "has PTF; "; if (queryProperties.usesScript()) msg += "uses scripts; "; - if (queryProperties.hasMultiDestQuery()) - msg += "is a multi-destination query; "; if (queryProperties.hasLateralViews()) msg += "has lateral views; "; @@ -666,7 +832,7 @@ String fixCtasColumnName(String colName) { */ static class PreCboCtx extends PlannerContext { enum Type { - NONE, INSERT, CTAS_OR_MV, UNEXPECTED + NONE, INSERT, MULTI_INSERT, CTAS_OR_MV, UNEXPECTED } private ASTNode nodeOfInterest; @@ -694,6 +860,17 @@ void setInsertToken(ASTNode ast, boolean isTmpFileDest) { set(PreCboCtx.Type.INSERT, ast); } } + + @Override + void setMultiInsertToken(ASTNode child) { + set(PreCboCtx.Type.MULTI_INSERT, child); + } + + @Override + void resetToken() { + this.type = Type.NONE; + this.nodeOfInterest = null; + } } ASTNode fixUpAfterCbo(ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx) @@ -724,6 +901,12 @@ ASTNode fixUpAfterCbo(ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx) return newAst; } + case MULTI_INSERT: { + // Patch the optimized query back into original FROM clause. + replaceASTChild(cboCtx.nodeOfInterest, newAst); + return originalAst; + } + default: throw new AssertionError("Unexpected type " + cboCtx.type); } @@ -3817,14 +4000,7 @@ public Object post(Object t) { } private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { - QBParseInfo qbp = qb.getParseInfo(); - if (qbp.getClauseNames().size() > 1) { - String msg = String.format("Multi Insert is currently not supported in CBO," - + " turn off cbo to use Multi Insert."); - LOG.debug(msg); - throw new CalciteSemanticException(msg, UnsupportedFeature.Multi_insert); - } - return qbp; + return qb.getParseInfo(); } private List getTabAliases(RowResolver inputRR) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index f549dff..7bf1c59 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -40,8 +40,8 @@ **/ public class QBParseInfo { - private final boolean isSubQ; - private final String alias; + private boolean isSubQ; + private String alias; private ASTNode joinExpr; private ASTNode hints; private final HashMap aliasToSrc; @@ -66,6 +66,7 @@ // insertIntoTables/insertOverwriteTables map a table's fullName to its ast; private final Map insertIntoTables; private final Map insertOverwriteTables; + private ASTNode queryFromExpr; private boolean isAnalyzeCommand; // used for the analyze command (statistics) private boolean isNoScanAnalyzeCommand; // used for the analyze command (statistics) (noscan) @@ -235,6 +236,10 @@ public void setSelExprForClause(String clause, ASTNode ast) { destToSelExpr.put(clause, ast); } + public void setQueryFromExpr(ASTNode ast) { + queryFromExpr = ast; + } + public void setWhrExprForClause(String clause, ASTNode ast) { destToWhereExpr.put(clause, ast); } @@ -354,6 +359,10 @@ public ASTNode getSelForClause(String clause) { return destToSelExpr.get(clause); } + public ASTNode getQueryFrom() { + return queryFromExpr; + } + /** * Get the Cluster By AST for the clause. * @@ -415,10 +424,18 @@ public String getAlias() { return alias; } + public void setAlias(String alias) { + this.alias = alias; + } + public boolean getIsSubQ() { return isSubQ; } + public void setIsSubQ(boolean isSubQ) { + this.isSubQ = isSubQ; + } + public ASTNode getJoinExpr() { return joinExpr; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d0131b7..3fcafa0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1482,15 +1482,25 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast)); } - if (plannerCtx != null) { - plannerCtx.setInsertToken(ast, isTmpFileDest); - } - qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); handleInsertStatementSpecPhase1(ast, qbp, ctx_1); - if (qbp.getClauseNamesForDest().size() > 1) { + + if (qbp.getClauseNamesForDest().size() == 2) { + // From the moment that we have two destination clauses, + // we know that this is a multi-insert query. + // Thus, set property to right value. queryProperties.setMultiDestQuery(true); } + + if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) { + plannerCtx.setInsertToken(ast, isTmpFileDest); + } else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) { + // For multi-insert query, currently we only optimize the FROM clause. + // Hence, introduce multi-insert token on top of it. + // However, first we need to reset existing token (insert). + plannerCtx.resetToken(); + plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0)); + } break; case HiveParser.TOK_FROM: @@ -1500,6 +1510,10 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan "Multiple Children " + child_count)); } + if (!qbp.getIsSubQ()) { + qbp.setQueryFromExpr(ast); + } + // Check if this is a subquery / lateral view ASTNode frm = (ASTNode) ast.getChild(0); if (frm.getToken().getType() == HiveParser.TOK_TABREF) { @@ -10662,6 +10676,11 @@ void setCTASOrMVToken(ASTNode child) { void setInsertToken(ASTNode ast, boolean isTmpFileDest) { } + void setMultiInsertToken(ASTNode child) { + } + + void resetToken() { + } } private Table getTableObjectByName(String tableName) throws HiveException { diff --git ql/src/test/queries/clientpositive/multi_insert_gby4.q ql/src/test/queries/clientpositive/multi_insert_gby4.q new file mode 100644 index 0000000..2e22096 --- /dev/null +++ ql/src/test/queries/clientpositive/multi_insert_gby4.q @@ -0,0 +1,26 @@ +-- SORT_QUERY_RESULTS + +create table e1 (key string, count int); +create table e2 (key string, count int); +create table e3 (key string, count int); + +explain +FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key; + +FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key; + +select * from e1; +select * from e2; +select * from e3; diff --git ql/src/test/queries/clientpositive/multi_insert_with_join2.q ql/src/test/queries/clientpositive/multi_insert_with_join2.q index 1529fa2..37d1678 100644 --- ql/src/test/queries/clientpositive/multi_insert_with_join2.q +++ ql/src/test/queries/clientpositive/multi_insert_with_join2.q @@ -1,4 +1,4 @@ -set hive.cbo.enable=false; +set hive.strict.checks.cartesian.product=false; CREATE TABLE T_A ( id STRING, val STRING ); CREATE TABLE T_B ( id STRING, val STRING ); @@ -49,3 +49,30 @@ WHERE b.id = 'Id_1' AND b.val = 'val_103' INSERT OVERWRITE TABLE join_result_3 SELECT a.*, b.* WHERE b.val = 'val_104' AND b.id = 'Id_2'; + +explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT * +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT * +WHERE b.val = 'val_104' AND b.id = 'Id_2'; + +explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.id, a.val, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, a.val, b.id, b.val +WHERE b.val = 'val_104' AND b.id = 'Id_2'; + +explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.val, a.id, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, b.val, b.id, a.val +WHERE b.val = 'val_104' AND b.id = 'Id_2'; diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out index b45411c..90493ff 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out @@ -89,15 +89,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -267,15 +268,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -445,15 +447,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: _col0 (type: int), _col2 (type: int) diff --git ql/src/test/results/clientpositive/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index 6572f6c..008b796 100644 --- ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -5808,7 +5808,7 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 8) (type: boolean) + predicate: (UDFToDouble(key) = 8.0) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) diff --git ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index ce71354..35b38ca 100644 --- ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -6296,7 +6296,7 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 8) (type: boolean) + predicate: (UDFToDouble(key) = 8.0) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) diff --git ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out index e999077..3083291 100644 --- ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out @@ -1599,6 +1599,10 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -1606,22 +1610,34 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 File Output Operator compressed: false Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE @@ -1629,15 +1645,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - File Output Operator - compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest2 Execution mode: llap Stage: Stage-3 @@ -1812,6 +1820,10 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -1819,38 +1831,42 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out index a539e03..633abff 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out @@ -96,19 +96,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -144,12 +144,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -310,19 +314,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -358,12 +362,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -524,19 +532,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -572,12 +580,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/explainuser_2.q.out ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 931f1a2..9bb478c 100644 --- ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -2938,19 +2938,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 11 <- Union 9 (CONTAINS) + Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) Map 12 <- Union 9 (CONTAINS) - Map 16 <- Map 20 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 17 <- Map 20 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 18 <- Map 20 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 19 <- Map 20 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 20 <- Map 21 (BROADCAST_EDGE) - Map 5 <- Union 2 (CONTAINS) - Map 7 <- Map 6 (BROADCAST_EDGE) + Map 13 <- Union 9 (CONTAINS) + Map 16 <- Map 17 (BROADCAST_EDGE) + Map 18 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) + Map 19 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) + Map 20 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) + Map 21 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) + Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) Map 8 <- Union 9 (CONTAINS) - Reducer 10 <- Reducer 14 (SIMPLE_EDGE), Union 4 (CONTAINS), Union 9 (SIMPLE_EDGE) - Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) + Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) + Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: @@ -2964,20 +2963,26 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 525 Data size: 5503 Basic stats: COMPLETE Column stats: NONE + input vertices: + 1 Map 6 + Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 525 Data size: 5503 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 11 + Map 12 Map Operator Tree: TableScan alias: src @@ -2987,20 +2992,16 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 12 + Map 13 Map Operator Tree: TableScan alias: src @@ -3010,20 +3011,16 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 13 + Map 14 Map Operator Tree: TableScan alias: x @@ -3031,12 +3028,16 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 15 @@ -3047,180 +3048,136 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 16 Map Operator Tree: TableScan - alias: src1 + alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col1 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1525 Data size: 16127 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 0 Map 20 - Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.a - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.b - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.c + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 17 + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col3 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col3 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col3 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col3 (type: string) Execution mode: llap LLAP IO: no inputs Map 17 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1525 Data size: 16127 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 0 Map 20 - Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.a - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.b - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.c + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 18 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1525 Data size: 16127 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 0 Map 20 + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 0 Map 16 + Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.a - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.b - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.c + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.b + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.c Execution mode: llap LLAP IO: no inputs Map 19 @@ -3233,110 +3190,150 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1525 Data size: 16127 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 0 Map 20 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 0 Map 16 + Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.a - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.b - File Output Operator - compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.c + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.b + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.c Execution mode: llap LLAP IO: no inputs Map 20 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 21 - Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col6 (type: string) - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col6 (type: string) - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col6 (type: string) - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col6 (type: string) + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 0 Map 16 + Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.b + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.c Execution mode: llap LLAP IO: no inputs Map 21 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 + input vertices: + 0 Map 16 + Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.b + File Output Operator + compressed: false + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.c Execution mode: llap LLAP IO: no inputs Map 5 @@ -3349,17 +3346,23 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 525 Data size: 5503 Basic stats: COMPLETE Column stats: NONE + input vertices: + 1 Map 6 + Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 525 Data size: 5503 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 6 @@ -3370,12 +3373,22 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 7 @@ -3386,22 +3399,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col6 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 8 @@ -3414,17 +3421,13 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 10 @@ -3434,17 +3437,33 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col1 (type: string) - outputColumnNames: _col0, _col6 + outputColumnNames: _col1 Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col4 + Statistics: Num rows: 1239 Data size: 13085 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col6 (type: string) + expressions: _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1239 Data size: 13085 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3452,7 +3471,7 @@ STAGE PLANS: name: default.a File Output Operator compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3460,29 +3479,12 @@ STAGE PLANS: name: default.b File Output Operator compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.c - Reducer 14 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col6 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3491,16 +3493,16 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col6 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: string) + outputColumnNames: _col1, _col4 + Statistics: Num rows: 634 Data size: 6658 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col6 (type: string) + expressions: _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 634 Data size: 6658 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3508,7 +3510,7 @@ STAGE PLANS: name: default.a File Output Operator compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3516,7 +3518,7 @@ STAGE PLANS: name: default.b File Output Operator compressed: false - Statistics: Num rows: 3409 Data size: 36062 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3603,28 +3605,27 @@ INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 11 <- Map 10 (BROADCAST_EDGE) Map 12 <- Union 13 (CONTAINS) -Map 18 <- Union 13 (CONTAINS) -Map 19 <- Union 15 (CONTAINS) -Map 23 <- Union 24 (CONTAINS) -Map 30 <- Union 24 (CONTAINS) -Map 31 <- Union 26 (CONTAINS) -Map 32 <- Union 28 (CONTAINS) -Map 33 <- Map 34 (BROADCAST_EDGE) +Map 19 <- Union 13 (CONTAINS) +Map 20 <- Union 15 (CONTAINS) +Map 23 <- Map 24 (BROADCAST_EDGE) +Map 25 <- Union 26 (CONTAINS) +Map 32 <- Union 26 (CONTAINS) +Map 33 <- Union 28 (CONTAINS) +Map 34 <- Union 30 (CONTAINS) Map 9 <- Union 2 (CONTAINS) Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS) Reducer 16 <- Union 15 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) -Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) +Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 29 <- Map 33 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 29 <- Union 28 (SIMPLE_EDGE), Union 30 (CONTAINS) +Reducer 3 <- Map 10 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 31 <- Map 23 (BROADCAST_EDGE), Union 30 (SIMPLE_EDGE), Union 7 (CONTAINS) Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) Reducer 8 <- Union 7 (SIMPLE_EDGE) @@ -3638,247 +3639,265 @@ Stage-5 Dependency Collection{} Stage-3 Reducer 8 llap - File Output Operator [FS_114] + File Output Operator [FS_123] table:{"name:":"default.a"} - Group By Operator [GBY_111] (rows=544 width=10) + Group By Operator [GBY_120] (rows=530 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 7 [SIMPLE_EDGE] - <-Reducer 29 [CONTAINS] llap - Reduce Output Operator [RS_110] + <-Reducer 31 [CONTAINS] llap + Reduce Output Operator [RS_119] PartitionCols:_col0, _col1 - Group By Operator [GBY_109] (rows=1089 width=10) + Group By Operator [GBY_118] (rows=1061 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_105] (rows=484 width=10) + Select Operator [SEL_114] (rows=484 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_160] (rows=484 width=10) - Conds:RS_101._col1=SEL_92._col1(Inner),Output:["_col0","_col6"] - <-Map 33 [BROADCAST_EDGE] llap - BROADCAST [RS_101] + Map Join Operator [MAPJOIN_172] (rows=484 width=10) + Conds:RS_111._col1=SEL_107._col1(Inner),Output:["_col0","_col3"] + <-Map 23 [BROADCAST_EDGE] llap + BROADCAST [RS_111] PartitionCols:_col1 - Map Join Operator [MAPJOIN_157] (rows=27 width=7) - Conds:FIL_153.key=RS_98.key(Inner),Output:["_col0","_col1","_col6"] - <-Map 34 [BROADCAST_EDGE] llap - BROADCAST [RS_98] - PartitionCols:key - Filter Operator [FIL_154] (rows=25 width=7) - predicate:key is not null - TableScan [TS_94] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Filter Operator [FIL_153] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_93] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_92] (rows=440 width=10) + Map Join Operator [MAPJOIN_171] (rows=27 width=7) + Conds:SEL_71._col0=RS_109._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 24 [BROADCAST_EDGE] llap + BROADCAST [RS_109] + PartitionCols:_col0 + Select Operator [SEL_74] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_162] (rows=25 width=7) + predicate:key is not null + TableScan [TS_72] (rows=25 width=7) + default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_71] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_161] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_69] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_107] (rows=440 width=10) Output:["_col1"] - Group By Operator [GBY_91] (rows=440 width=10) + Group By Operator [GBY_106] (rows=440 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 28 [SIMPLE_EDGE] - <-Map 32 [CONTAINS] llap - Reduce Output Operator [RS_90] + <-Union 30 [SIMPLE_EDGE] + <-Map 34 [CONTAINS] llap + Reduce Output Operator [RS_105] PartitionCols:_col0, _col1 - Group By Operator [GBY_89] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_85] (rows=500 width=10) + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_100] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_152] (rows=500 width=10) + Filter Operator [FIL_166] (rows=500 width=10) predicate:value is not null - TableScan [TS_84] (rows=500 width=10) + TableScan [TS_98] (rows=500 width=10) Output:["key","value"] - <-Reducer 27 [CONTAINS] llap - Reduce Output Operator [RS_90] + <-Reducer 29 [CONTAINS] llap + Reduce Output Operator [RS_105] PartitionCols:_col0, _col1 - Group By Operator [GBY_89] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_82] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 26 [SIMPLE_EDGE] - <-Map 31 [CONTAINS] llap - Reduce Output Operator [RS_81] - PartitionCols:_col0, _col1 - Group By Operator [GBY_80] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_76] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_151] (rows=500 width=10) - predicate:value is not null - TableScan [TS_75] (rows=500 width=10) - Output:["key","value"] - <-Reducer 25 [CONTAINS] llap - Reduce Output Operator [RS_81] - PartitionCols:_col0, _col1 - Group By Operator [GBY_80] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_73] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 24 [SIMPLE_EDGE] - <-Map 23 [CONTAINS] llap - Reduce Output Operator [RS_72] - PartitionCols:_col0, _col1 - Group By Operator [GBY_71] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_65] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_149] (rows=25 width=7) - predicate:value is not null - TableScan [TS_64] (rows=25 width=7) - Output:["key","value"] - <-Map 30 [CONTAINS] llap - Reduce Output Operator [RS_72] - PartitionCols:_col0, _col1 - Group By Operator [GBY_71] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_67] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=500 width=10) - predicate:value is not null - TableScan [TS_66] (rows=500 width=10) - Output:["key","value"] + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_97] (rows=381 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_96] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 28 [SIMPLE_EDGE] + <-Map 33 [CONTAINS] llap + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_90] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_165] (rows=500 width=10) + predicate:value is not null + TableScan [TS_88] (rows=500 width=10) + Output:["key","value"] + <-Reducer 27 [CONTAINS] llap + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_87] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_86] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 26 [SIMPLE_EDGE] + <-Map 25 [CONTAINS] llap + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_77] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_163] (rows=25 width=7) + predicate:value is not null + TableScan [TS_75] (rows=25 width=7) + Output:["key","value"] + <-Map 32 [CONTAINS] llap + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_80] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_164] (rows=500 width=10) + predicate:value is not null + TableScan [TS_78] (rows=500 width=10) + Output:["key","value"] <-Reducer 6 [CONTAINS] llap - Reduce Output Operator [RS_110] + Reduce Output Operator [RS_119] PartitionCols:_col0, _col1 - Group By Operator [GBY_109] (rows=1089 width=10) + Group By Operator [GBY_118] (rows=1061 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_62] (rows=605 width=10) + Group By Operator [GBY_67] (rows=577 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 5 [SIMPLE_EDGE] - <-Reducer 17 [CONTAINS] llap - Reduce Output Operator [RS_61] + <-Reducer 18 [CONTAINS] llap + Reduce Output Operator [RS_66] PartitionCols:_col0, _col1 - Group By Operator [GBY_60] (rows=1210 width=10) + Group By Operator [GBY_65] (rows=1155 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_56] (rows=605 width=10) + Select Operator [SEL_61] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_159] (rows=605 width=10) - Conds:RS_52._col1=RS_54._col1(Inner),Output:["_col0","_col6"] - <-Reducer 16 [SIMPLE_EDGE] llap - SHUFFLE [RS_54] - PartitionCols:_col1 - Select Operator [SEL_43] (rows=381 width=10) - Output:["_col1"] - Group By Operator [GBY_42] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 15 [SIMPLE_EDGE] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_41] - PartitionCols:_col0, _col1 - Group By Operator [GBY_40] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_36] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_146] (rows=500 width=10) - predicate:value is not null - TableScan [TS_35] (rows=500 width=10) - Output:["key","value"] - <-Reducer 14 [CONTAINS] llap - Reduce Output Operator [RS_41] - PartitionCols:_col0, _col1 - Group By Operator [GBY_40] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_33] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 13 [SIMPLE_EDGE] - <-Map 12 [CONTAINS] llap - Reduce Output Operator [RS_32] - PartitionCols:_col0, _col1 - Group By Operator [GBY_31] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_144] (rows=25 width=7) - predicate:value is not null - TableScan [TS_24] (rows=25 width=7) - Output:["key","value"] - <-Map 18 [CONTAINS] llap - Reduce Output Operator [RS_32] - PartitionCols:_col0, _col1 - Group By Operator [GBY_31] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_27] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_145] (rows=500 width=10) - predicate:value is not null - TableScan [TS_26] (rows=500 width=10) - Output:["key","value"] - <-Reducer 21 [SIMPLE_EDGE] llap - SHUFFLE [RS_52] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_156] (rows=550 width=10) - Conds:RS_47.key=RS_49.key(Inner),Output:["_col0","_col1","_col6"] - <-Map 20 [SIMPLE_EDGE] llap - SHUFFLE [RS_47] - PartitionCols:key - Filter Operator [FIL_147] (rows=500 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_44] (rows=500 width=10) - default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 22 [SIMPLE_EDGE] llap - SHUFFLE [RS_49] - PartitionCols:key - Filter Operator [FIL_148] (rows=500 width=10) - predicate:key is not null - TableScan [TS_45] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Merge Join Operator [MERGEJOIN_170] (rows=605 width=10) + Conds:RS_58._col2=RS_59._col0(Inner),Output:["_col2","_col5"] + <-Map 22 [SIMPLE_EDGE] llap + SHUFFLE [RS_59] + PartitionCols:_col0 + Select Operator [SEL_54] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_160] (rows=500 width=10) + predicate:key is not null + TableScan [TS_52] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 17 [SIMPLE_EDGE] llap + SHUFFLE [RS_58] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_169] (rows=550 width=10) + Conds:RS_55._col1=RS_56._col1(Inner),Output:["_col2"] + <-Map 21 [SIMPLE_EDGE] llap + SHUFFLE [RS_56] + PartitionCols:_col1 + Select Operator [SEL_51] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_159] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_49] (rows=500 width=10) + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 16 [SIMPLE_EDGE] llap + SHUFFLE [RS_55] + PartitionCols:_col1 + Select Operator [SEL_48] (rows=381 width=10) + Output:["_col1"] + Group By Operator [GBY_47] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 15 [SIMPLE_EDGE] + <-Map 20 [CONTAINS] llap + Reduce Output Operator [RS_46] + PartitionCols:_col0, _col1 + Group By Operator [GBY_45] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_41] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_158] (rows=500 width=10) + predicate:value is not null + TableScan [TS_39] (rows=500 width=10) + Output:["key","value"] + <-Reducer 14 [CONTAINS] llap + Reduce Output Operator [RS_46] + PartitionCols:_col0, _col1 + Group By Operator [GBY_45] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_38] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_37] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 13 [SIMPLE_EDGE] + <-Map 12 [CONTAINS] llap + Reduce Output Operator [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_28] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_156] (rows=25 width=7) + predicate:value is not null + TableScan [TS_26] (rows=25 width=7) + Output:["key","value"] + <-Map 19 [CONTAINS] llap + Reduce Output Operator [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_31] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=500 width=10) + predicate:value is not null + TableScan [TS_29] (rows=500 width=10) + Output:["key","value"] <-Reducer 4 [CONTAINS] llap - Reduce Output Operator [RS_61] + Reduce Output Operator [RS_66] PartitionCols:_col0, _col1 - Group By Operator [GBY_60] (rows=1210 width=10) + Group By Operator [GBY_65] (rows=1155 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_23] (rows=605 width=10) + Select Operator [SEL_25] (rows=550 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_158] (rows=605 width=10) - Conds:RS_19._col1=RS_21._col1(Inner),Output:["_col0","_col6"] + Merge Join Operator [MERGEJOIN_168] (rows=550 width=10) + Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col2","_col5"] <-Map 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_155] (rows=550 width=10) - Conds:RS_14.key=FIL_143.key(Inner),Output:["_col0","_col1","_col6"] - <-Map 10 [BROADCAST_EDGE] llap - BROADCAST [RS_14] - PartitionCols:key - Filter Operator [FIL_142] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_11] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Filter Operator [FIL_143] (rows=500 width=10) + SHUFFLE [RS_23] + PartitionCols:_col0 + Select Operator [SEL_18] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=500 width=10) predicate:key is not null - TableScan [TS_12] (rows=500 width=10) + TableScan [TS_16] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] - PartitionCols:_col1 - Select Operator [SEL_10] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_9] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] llap - Reduce Output Operator [RS_8] - PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_1] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_140] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["key","value"] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_8] - PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_3] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_141] (rows=500 width=10) - predicate:value is not null - TableScan [TS_2] (rows=500 width=10) - Output:["key","value"] - File Output Operator [FS_116] + SHUFFLE [RS_22] + PartitionCols:_col2 + Map Join Operator [MAPJOIN_167] (rows=288 width=10) + Conds:SEL_12._col1=RS_20._col1(Inner),Output:["_col2"] + <-Map 10 [BROADCAST_EDGE] llap + BROADCAST [RS_20] + PartitionCols:_col1 + Select Operator [SEL_15] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_154] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_13] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_12] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_11] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] llap + Reduce Output Operator [RS_10] + PartitionCols:_col0, _col1 + Group By Operator [GBY_9] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_2] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_152] (rows=25 width=7) + predicate:value is not null + TableScan [TS_0] (rows=25 width=7) + Output:["key","value"] + <-Map 9 [CONTAINS] llap + Reduce Output Operator [RS_10] + PartitionCols:_col0, _col1 + Group By Operator [GBY_9] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_5] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_153] (rows=500 width=10) + predicate:value is not null + TableScan [TS_3] (rows=500 width=10) + Output:["key","value"] + File Output Operator [FS_125] table:{"name:":"default.b"} - Please refer to the previous Group By Operator [GBY_111] - File Output Operator [FS_118] + Please refer to the previous Group By Operator [GBY_120] + File Output Operator [FS_127] table:{"name:":"default.c"} - Please refer to the previous Group By Operator [GBY_111] + Please refer to the previous Group By Operator [GBY_120] Stage-6 Stats-Aggr Operator Stage-1 @@ -3922,7 +3941,7 @@ FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Map 6 <- Union 3 (CONTAINS) @@ -3939,50 +3958,50 @@ Stage-4 Dependency Collection{} Stage-2 Reducer 5 llap - File Output Operator [FS_20] + File Output Operator [FS_21] table:{"name:":"default.dest1"} - Group By Operator [GBY_18] (rows=1 width=96) + Group By Operator [GBY_19] (rows=1 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1 width=280) + Group By Operator [GBY_17] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Group By Operator [GBY_13] (rows=1 width=272) + Group By Operator [GBY_14] (rows=1 width=272) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 3 [SIMPLE_EDGE] <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_12] + Reduce Output Operator [RS_13] PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=1 width=272) + Group By Operator [GBY_12] (rows=1 width=272) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_7] (rows=500 width=10) + Select Operator [SEL_8] (rows=500 width=10) Output:["_col0","_col1"] - TableScan [TS_6] (rows=500 width=10) + TableScan [TS_7] (rows=500 width=10) Output:["key","value"] <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_12] + Reduce Output Operator [RS_13] PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=1 width=272) + Group By Operator [GBY_12] (rows=1 width=272) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=1 width=272) + Select Operator [SEL_6] (rows=1 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1 width=8) + Group By Operator [GBY_5] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=8) + SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=8) Output:["_col0"],aggregations:["count(1)"] Select Operator [SEL_1] (rows=500 width=10) TableScan [TS_0] (rows=500 width=10) default@src,s1,Tbl:COMPLETE,Col:COMPLETE - File Output Operator [FS_26] + File Output Operator [FS_27] table:{"name:":"default.dest2"} - Select Operator [SEL_25] (rows=1 width=456) + Select Operator [SEL_26] (rows=1 width=456) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_24] (rows=1 width=464) + Group By Operator [GBY_25] (rows=1 width=464) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_13] + Please refer to the previous Group By Operator [GBY_14] Stage-5 Stats-Aggr Operator Stage-1 @@ -4093,7 +4112,7 @@ INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Map 6 <- Union 3 (CONTAINS) @@ -4111,68 +4130,64 @@ Stage-4 Dependency Collection{} Stage-2 Reducer 4 llap - File Output Operator [FS_20] + File Output Operator [FS_19] table:{"name:":"default.dest1"} - Group By Operator [GBY_18] (rows=1 width=96) + Group By Operator [GBY_17] (rows=1 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_17] + Reduce Output Operator [RS_16] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1 width=280) + Group By Operator [GBY_15] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_9] (rows=501 width=272) + Select Operator [SEL_8] (rows=500 width=10) Output:["_col0","_col1"] - Select Operator [SEL_7] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_23] + TableScan [TS_7] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_22] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=464) + Group By Operator [GBY_21] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_9] + Please refer to the previous Select Operator [SEL_8] <-Map 7 [CONTAINS] llap - Reduce Output Operator [RS_17] + Reduce Output Operator [RS_16] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1 width=280) + Group By Operator [GBY_15] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) Select Operator [SEL_12] (rows=500 width=10) Output:["_col0","_col1"] TableScan [TS_11] (rows=500 width=10) Output:["key","value"] - Reduce Output Operator [RS_23] + Reduce Output Operator [RS_22] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=464) + Group By Operator [GBY_21] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) Please refer to the previous Select Operator [SEL_12] <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_17] + Reduce Output Operator [RS_16] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1 width=280) + Group By Operator [GBY_15] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_9] (rows=501 width=272) + Select Operator [SEL_6] (rows=1 width=272) Output:["_col0","_col1"] - Select Operator [SEL_5] (rows=1 width=360) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=8) - Output:["_col0"],aggregations:["count(1)"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_23] + Group By Operator [GBY_5] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=8) + Output:["_col0"],aggregations:["count(1)"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_22] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=464) + Group By Operator [GBY_21] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_9] + Please refer to the previous Select Operator [SEL_6] Reducer 5 llap - File Output Operator [FS_26] + File Output Operator [FS_25] table:{"name:":"default.dest2"} - Group By Operator [GBY_24] (rows=1 width=280) + Group By Operator [GBY_23] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 <- Please refer to the previous Union 3 [SIMPLE_EDGE] Stage-5 @@ -4202,7 +4217,7 @@ INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Map 6 <- Union 3 (CONTAINS) @@ -4219,54 +4234,50 @@ Stage-4 Dependency Collection{} Stage-2 Reducer 4 llap - File Output Operator [FS_15] + File Output Operator [FS_16] table:{"name:":"default.dest1"} - Group By Operator [GBY_13] (rows=1 width=96) + Group By Operator [GBY_14] (rows=1 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_12] + Reduce Output Operator [RS_13] PartitionCols:_col0 - Group By Operator [GBY_11] (rows=1 width=280) + Group By Operator [GBY_12] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_9] (rows=501 width=11) + Select Operator [SEL_8] (rows=500 width=10) Output:["_col0","_col1"] - Select Operator [SEL_7] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_18] + TableScan [TS_7] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_19] PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=1 width=464) + Group By Operator [GBY_18] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_9] + Please refer to the previous Select Operator [SEL_8] <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_12] + Reduce Output Operator [RS_13] PartitionCols:_col0 - Group By Operator [GBY_11] (rows=1 width=280) + Group By Operator [GBY_12] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_9] (rows=501 width=11) + Select Operator [SEL_6] (rows=1 width=272) Output:["_col0","_col1"] - Select Operator [SEL_5] (rows=1 width=360) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=8) - Output:["_col0"],aggregations:["count(1)"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_18] + Group By Operator [GBY_5] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=8) + Output:["_col0"],aggregations:["count(1)"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_19] PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=1 width=464) + Group By Operator [GBY_18] (rows=1 width=464) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_9] + Please refer to the previous Select Operator [SEL_6] Reducer 5 llap - File Output Operator [FS_21] + File Output Operator [FS_22] table:{"name:":"default.dest2"} - Group By Operator [GBY_19] (rows=1 width=280) + Group By Operator [GBY_20] (rows=1 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 <- Please refer to the previous Union 3 [SIMPLE_EDGE] Stage-5 diff --git ql/src/test/results/clientpositive/llap/lineage3.q.out ql/src/test/results/clientpositive/llap/lineage3.q.out index 72a9344..bb23c41 100644 --- ql/src/test/results/clientpositive/llap/lineage3.q.out +++ ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -25,7 +25,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 PREHOOK: Output: default@d2 -{"version":"1.0","engine":"tez","database":"default","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: drop table if exists t PREHOOK: type: DROPTABLE PREHOOK: query: create table t as @@ -370,7 +370,7 @@ PREHOOK: query: create table src_dp1 (f string, w string, m int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@src_dp1 -Warning: Shuffle Join MERGEJOIN[17][tables = [src_dp, src_dp1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: from src_dp, src_dp1 insert into dest_dp1 partition (year) select first, word, year insert into dest_dp2 partition (y, m) select first, word, year, month @@ -385,4 +385,4 @@ PREHOOK: Output: default@dest_dp1@year=0 PREHOOK: Output: default@dest_dp2 PREHOOK: Output: default@dest_dp2@y=1 PREHOOK: Output: default@dest_dp3@y=2 -{"version":"1.0","engine":"tez","database":"default","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(src_dp.year = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} diff --git ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index 9b7612d..113a925 100644 --- ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -90,33 +90,29 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 133000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -163,33 +159,29 @@ STAGE PLANS: Select Operator expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -287,10 +279,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), (src)s0.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), (src)s0.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), (src)s0.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), (src)s0.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), (src)s0.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), (src)s0.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), (src)s0.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), (src)s0.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from DEST1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -1030,33 +1022,29 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 133000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -1104,33 +1092,29 @@ STAGE PLANS: Select Operator expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 @@ -1191,11 +1175,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 -POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s0.FieldSchema(name:key, type:string, comment:default), (src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s0.FieldSchema(name:value, type:string, comment:default), (src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s0.FieldSchema(name:key, type:string, comment:default), (src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s0.FieldSchema(name:value, type:string, comment:default), (src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s0.FieldSchema(name:value, type:string, comment:default), (src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), (src)s0.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), (src)s0.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), (src)s0.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), (src)s0.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), (src)s0.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from DEST1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -2085,10 +2069,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s0.FieldSchema(name:key, type:string, comment:default), (src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s0.FieldSchema(name:value, type:string, comment:default), (src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s0.FieldSchema(name:value, type:string, comment:default), (src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s0.FieldSchema(name:key, type:string, comment:default), (src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s0.FieldSchema(name:value, type:string, comment:default), (src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s0.FieldSchema(name:value, type:string, comment:default), (src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s0.FieldSchema(name:value, type:string, comment:default), (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s0.FieldSchema(name:value, type:string, comment:default), (src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from DEST1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -2932,10 +2916,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from DEST1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -3775,10 +3759,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from DEST1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index 624d886..a63a68b 100644 --- ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -3012,10 +3012,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT DEST1.* FROM DEST1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -3848,10 +3848,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest118 POSTHOOK: Output: default@dest218 POSTHOOK: Lineage: dest118.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest118.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest118.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest218.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest218.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest218.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest218.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest218.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT DEST118.* FROM DEST118 SORT BY DEST118.key, DEST118.value PREHOOK: type: QUERY PREHOOK: Input: default@dest118 @@ -4696,10 +4696,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest119 POSTHOOK: Output: default@dest219 POSTHOOK: Lineage: dest119.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest119.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest119.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest219.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest219.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest219.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest219.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest219.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT DEST119.* FROM DEST119 SORT BY DEST119.key, DEST119.value PREHOOK: type: QUERY PREHOOK: Input: default@dest119 @@ -12515,11 +12515,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -12538,11 +12538,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -12847,11 +12847,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 735e4f4..00b1aa7 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -1032,7 +1032,7 @@ from ( insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, val1, val2 POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Stage-4 Stats-Aggr Operator @@ -1043,25 +1043,29 @@ Stage-4 Dependency Collection{} Stage-2 Map 1 llap - File Output Operator [FS_9] + File Output Operator [FS_11] table:{"name:":"default.dest1"} - Select Operator [SEL_8] (rows=11 width=93) + Select Operator [SEL_10] (rows=11 width=93) Output:["_col0","_col1"] - Select Operator [SEL_7] (rows=11 width=93) + Select Operator [SEL_9] (rows=11 width=93) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_16] (rows=11 width=93) - Conds:FIL_14.key=FIL_15.key(Inner),Output:["_col0","_col1","_col6"] - <-Filter Operator [FIL_15] (rows=10 width=93) - predicate:key is not null - TableScan [TS_1] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Filter Operator [FIL_14] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - File Output Operator [FS_11] + Merge Join Operator [MERGEJOIN_18] (rows=11 width=93) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1","_col3"] + <-Select Operator [SEL_5] (rows=10 width=93) + Output:["_col0","_col1"] + Filter Operator [FIL_17] (rows=10 width=93) + predicate:key is not null + TableScan [TS_3] (rows=10 width=93) + default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=10 width=93) + Output:["_col0","_col1"] + Filter Operator [FIL_16] (rows=10 width=93) + predicate:key is not null + TableScan [TS_0] (rows=10 width=93) + default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + File Output Operator [FS_13] table:{"name:":"default.dest2"} - Please refer to the previous Select Operator [SEL_7] + Please refer to the previous Select Operator [SEL_9] Stage-5 Stats-Aggr Operator Stage-1 @@ -1188,7 +1192,7 @@ from ( insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, count(*) group by key POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) @@ -1202,32 +1206,36 @@ Stage-4 Dependency Collection{} Stage-2 Reducer 2 vectorized, llap - File Output Operator [FS_25] + File Output Operator [FS_27] table:{"name:":"default.dest2"} - Select Operator [SEL_24] (rows=5 width=93) + Select Operator [SEL_26] (rows=5 width=93) Output:["_col0","_col1"] - Group By Operator [GBY_23] (rows=5 width=93) + Group By Operator [GBY_25] (rows=5 width=93) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] llap - File Output Operator [FS_9] + File Output Operator [FS_11] table:{"name:":"default.dest1"} - Merge Join Operator [MERGEJOIN_21] (rows=11 width=93) - Conds:FIL_19.key=FIL_20.key(Inner),Output:["_col0","_col1"] - <-Filter Operator [FIL_20] (rows=10 width=93) - predicate:key is not null - TableScan [TS_1] (rows=10 width=93) - default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Filter Operator [FIL_19] (rows=10 width=93) - predicate:key is not null - TableScan [TS_0] (rows=10 width=93) - default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - SHUFFLE [RS_12] + Merge Join Operator [MERGEJOIN_23] (rows=11 width=93) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=10 width=93) + Output:["_col0"] + Filter Operator [FIL_22] (rows=10 width=93) + predicate:key is not null + TableScan [TS_3] (rows=10 width=93) + default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_2] (rows=10 width=93) + Output:["_col0","_col1"] + Filter Operator [FIL_21] (rows=10 width=93) + predicate:key is not null + TableScan [TS_0] (rows=10 width=93) + default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + SHUFFLE [RS_14] PartitionCols:_col0 - Group By Operator [GBY_11] (rows=11 width=93) + Group By Operator [GBY_13] (rows=11 width=93) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_10] (rows=11 width=93) + Select Operator [SEL_12] (rows=11 width=93) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_21] + Please refer to the previous Merge Join Operator [MERGEJOIN_23] Stage-5 Stats-Aggr Operator Stage-1 diff --git ql/src/test/results/clientpositive/multi_insert_gby4.q.out ql/src/test/results/clientpositive/multi_insert_gby4.q.out new file mode 100644 index 0000000..1536d4a --- /dev/null +++ ql/src/test/results/clientpositive/multi_insert_gby4.q.out @@ -0,0 +1,279 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table e1 (key string, count int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table e1 (key string, count int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e1 +PREHOOK: query: create table e2 (key string, count int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e2 +POSTHOOK: query: create table e2 (key string, count int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e2 +PREHOOK: query: create table e3 (key string, count int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e3 +POSTHOOK: query: create table e3 (key string, count int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e3 +PREHOOK: query: explain +FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > 490) or ((_col0 > 500) or (_col0 > 450))) (type: boolean) + Statistics: Num rows: 498 Data size: 5290 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 498 Data size: 5290 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 498 Data size: 5290 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 > 450) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + Filter Operator + predicate: (KEY._col0 > 500) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + Filter Operator + predicate: (KEY._col0 > 490) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 + + Stage: Stage-6 + Stats-Aggr Operator + +PREHOOK: query: FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@e1 +PREHOOK: Output: default@e2 +PREHOOK: Output: default@e3 +POSTHOOK: query: FROM (SELECT key, value FROM src) a +INSERT OVERWRITE TABLE e1 + SELECT key, COUNT(*) WHERE key>450 GROUP BY key +INSERT OVERWRITE TABLE e2 + SELECT key, COUNT(*) WHERE key>500 GROUP BY key +INSERT OVERWRITE TABLE e3 + SELECT key, COUNT(*) WHERE key>490 GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@e1 +POSTHOOK: Output: default@e2 +POSTHOOK: Output: default@e3 +POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: e2.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: e3.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from e1 +PREHOOK: type: QUERY +PREHOOK: Input: default@e1 +#### A masked pattern was here #### +POSTHOOK: query: select * from e1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e1 +#### A masked pattern was here #### +452 1 +453 1 +454 3 +455 1 +457 1 +458 2 +459 2 +460 1 +462 2 +463 2 +466 3 +467 1 +468 4 +469 5 +470 1 +472 1 +475 1 +477 1 +478 2 +479 1 +480 3 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 4 +490 1 +491 1 +492 2 +493 1 +494 1 +495 1 +496 1 +497 1 +498 3 +PREHOOK: query: select * from e2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e2 +#### A masked pattern was here #### +POSTHOOK: query: select * from e2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e2 +#### A masked pattern was here #### +PREHOOK: query: select * from e3 +PREHOOK: type: QUERY +PREHOOK: Input: default@e3 +#### A masked pattern was here #### +POSTHOOK: query: select * from e3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e3 +#### A masked pattern was here #### +491 1 +492 2 +493 1 +494 1 +495 1 +496 1 +497 1 +498 3 diff --git ql/src/test/results/clientpositive/multi_insert_union_src.q.out ql/src/test/results/clientpositive/multi_insert_union_src.q.out index 2036e63..1ff1db5 100644 --- ql/src/test/results/clientpositive/multi_insert_union_src.q.out +++ ql/src/test/results/clientpositive/multi_insert_union_src.q.out @@ -64,7 +64,7 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 10) (type: boolean) + predicate: (UDFToDouble(key) < 10.0) (type: boolean) Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -93,7 +93,7 @@ STAGE PLANS: alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 100) (type: boolean) + predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/multi_insert_with_join2.q.out ql/src/test/results/clientpositive/multi_insert_with_join2.q.out index 70a044d..5f69cc5 100644 --- ql/src/test/results/clientpositive/multi_insert_with_join2.q.out +++ ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -50,6 +50,7 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@t_b POSTHOOK: Lineage: t_b.id SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: t_b.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -74,45 +75,49 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: string) - sort order: + - Map-reduce partition columns: id (type: string) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Filter Operator + predicate: (id = 'Id_1') (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: val (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: b Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: string) - sort order: + - Map-reduce partition columns: id (type: string) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Filter Operator + predicate: ((id = 'Id_1') and (val = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: - 0 id (type: string) - 1 id (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.join_result_1 + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'Id_1' (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 Stage: Stage-0 Move Operator @@ -127,6 +132,7 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_3 @@ -151,45 +157,49 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: string) - sort order: + - Map-reduce partition columns: id (type: string) - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Filter Operator + predicate: ((id = 'Id_2') and (val <> 'val_104')) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: val (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: b Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: string) - sort order: + - Map-reduce partition columns: id (type: string) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Filter Operator + predicate: ((val = 'val_104') and (id = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 keys: - 0 id (type: string) - 1 id (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 <> 'val_104') and (_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.join_result_3 + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'Id_2' (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 Stage: Stage-0 Move Operator @@ -549,3 +559,384 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT * +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT * +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT * +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT * +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.id, a.val, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, a.val, b.id, b.val +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.id, a.val, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, a.val, b.id, b.val +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col3 = 'Id_1') and (_col2 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.val, a.id, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, b.val, b.id, a.val +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.val, a.id, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, b.val, b.id, a.val +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col3 = 'Id_1') and (_col2 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + diff --git ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out index 0e99972..318fc34 100644 --- ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -1393,22 +1393,34 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 File Output Operator compressed: false Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE @@ -1416,15 +1428,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - File Output Operator - compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest2 Stage: Stage-0 Move Operator @@ -1593,38 +1597,42 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) + Select Operator + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out index fb07771..00705e7 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out @@ -92,16 +92,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -279,16 +279,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -467,10 +467,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -486,19 +490,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) diff --git ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 92ca67b..9daaad9 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -4032,7 +4032,7 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 8) (type: boolean) + predicate: (UDFToDouble(key) = 8.0) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) diff --git ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index f7f4dbb..f345e7e 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -4181,7 +4181,7 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 8) (type: boolean) + predicate: (UDFToDouble(key) = 8.0) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) diff --git ql/src/test/results/clientpositive/spark/multi_insert_with_join2.q.out ql/src/test/results/clientpositive/spark/multi_insert_with_join2.q.out new file mode 100644 index 0000000..b79d86c --- /dev/null +++ ql/src/test/results/clientpositive/spark/multi_insert_with_join2.q.out @@ -0,0 +1,1006 @@ +PREHOOK: query: CREATE TABLE T_A ( id STRING, val STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T_A +POSTHOOK: query: CREATE TABLE T_A ( id STRING, val STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T_A +PREHOOK: query: CREATE TABLE T_B ( id STRING, val STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T_B +POSTHOOK: query: CREATE TABLE T_B ( id STRING, val STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T_B +PREHOOK: query: CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@join_result_1 +POSTHOOK: query: CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@join_result_1 +PREHOOK: query: CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@join_result_3 +POSTHOOK: query: CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@join_result_3 +PREHOOK: query: INSERT INTO TABLE T_A +VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103') +PREHOOK: type: QUERY +PREHOOK: Output: default@t_a +POSTHOOK: query: INSERT INTO TABLE T_A +VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t_a +POSTHOOK: Lineage: t_a.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t_a.val SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE T_B +VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104') +PREHOOK: type: QUERY +PREHOOK: Output: default@t_b +POSTHOOK: query: INSERT INTO TABLE T_B +VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t_b +POSTHOOK: Lineage: t_b.id SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t_b.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id = 'Id_1') (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: val (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((id = 'Id_1') and (val = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'Id_1' (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-2 + Stats-Aggr Operator + +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((id = 'Id_2') and (val <> 'val_104')) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: val (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((val = 'val_104') and (id = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'Id_2' (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2') and (_col1 <> _col6)) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a LEFT JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.*, b.* +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.*, b.* +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT * +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT * +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT * +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT * +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string) + sort order: + + Map-reduce partition columns: id (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 id (type: string) + 1 id (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.id, a.val, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, a.val, b.id, b.val +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.id, a.val, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, a.val, b.id, b.val +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col3 = 'Id_1') and (_col2 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.val, a.id, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, b.val, b.id, a.val +WHERE b.val = 'val_104' AND b.id = 'Id_2' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM T_A a JOIN T_B b ON a.id = b.id +INSERT OVERWRITE TABLE join_result_1 +SELECT a.val, a.id, b.id, b.val +WHERE b.id = 'Id_1' AND b.val = 'val_103' +INSERT OVERWRITE TABLE join_result_3 +SELECT a.id, b.val, b.id, a.val +WHERE b.val = 'val_104' AND b.id = 'Id_2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col3 = 'Id_1') and (_col2 = 'val_103')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + Filter Operator + predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.join_result_3 + + Stage: Stage-4 + Stats-Aggr Operator + diff --git ql/src/test/results/clientpositive/spark/union17.q.out ql/src/test/results/clientpositive/spark/union17.q.out index ce23773..6ef83be 100644 --- ql/src/test/results/clientpositive/spark/union17.q.out +++ ql/src/test/results/clientpositive/spark/union17.q.out @@ -229,10 +229,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT DEST1.* FROM DEST1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git ql/src/test/results/clientpositive/spark/union18.q.out ql/src/test/results/clientpositive/spark/union18.q.out index f9a28bb..aeaac97 100644 --- ql/src/test/results/clientpositive/spark/union18.q.out +++ ql/src/test/results/clientpositive/spark/union18.q.out @@ -172,10 +172,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT DEST1.* FROM DEST1 SORT BY DEST1.key, DEST1.value PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git ql/src/test/results/clientpositive/spark/union19.q.out ql/src/test/results/clientpositive/spark/union19.q.out index d81c19b..6f225a7 100644 --- ql/src/test/results/clientpositive/spark/union19.q.out +++ ql/src/test/results/clientpositive/spark/union19.q.out @@ -197,10 +197,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT DEST1.* FROM DEST1 SORT BY DEST1.key, DEST1.value PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git ql/src/test/results/clientpositive/spark/union31.q.out ql/src/test/results/clientpositive/spark/union31.q.out index a1f29eb..1f31dee 100644 --- ql/src/test/results/clientpositive/spark/union31.q.out +++ ql/src/test/results/clientpositive/spark/union31.q.out @@ -377,11 +377,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -398,11 +398,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -671,11 +671,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/spark/union_remove_6.q.out ql/src/test/results/clientpositive/spark/union_remove_6.q.out index 1bdeb09..36145e9 100644 --- ql/src/test/results/clientpositive/spark/union_remove_6.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_6.q.out @@ -84,11 +84,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out index cebea03..aafc1bd 100644 --- ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out @@ -90,11 +90,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out index c99acc9..641daf5 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out @@ -1123,24 +1123,24 @@ INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 12 <- Union 10 (CONTAINS) Map 13 <- Union 10 (CONTAINS) -Map 17 <- Union 18 (CONTAINS) -Map 20 <- Union 18 (CONTAINS) -Map 21 <- Union 18 (CONTAINS) -Map 22 <- Union 18 (CONTAINS) -Map 5 <- Union 2 (CONTAINS) +Map 14 <- Union 10 (CONTAINS) +Map 21 <- Union 22 (CONTAINS) +Map 23 <- Union 22 (CONTAINS) +Map 24 <- Union 22 (CONTAINS) +Map 25 <- Union 22 (CONTAINS) +Map 6 <- Union 2 (CONTAINS) Map 9 <- Union 10 (CONTAINS) -Reducer 11 <- Reducer 15 (SIMPLE_EDGE), Union 10 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 19 <- Reducer 24 (SIMPLE_EDGE), Union 18 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) -Reducer 3 <- Reducer 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Map 15 (SIMPLE_EDGE), Union 10 (SIMPLE_EDGE) +Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Union 22 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 4 <- Map 8 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-5 Stats-Aggr Operator @@ -1150,205 +1150,199 @@ Stage-5 Stage-4 Dependency Collection{} Stage-3 - Union 4 - <-Reducer 11 [CONTAINS] - File Output Operator [FS_66] + Union 5 + <-Reducer 12 [CONTAINS] + File Output Operator [FS_79] table:{"name:":"default.a"} - Select Operator [SEL_39] (rows=5838/5421 width=178) + Select Operator [SEL_45] (rows=5839/5421 width=178) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_111] (rows=5838/5421 width=178) - Conds:RS_35._col1=Union 10._col1(Inner),Output:["_col0","_col6"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_35] + Merge Join Operator [MERGEJOIN_126] (rows=5839/5421 width=178) + Conds:RS_42._col1=RS_43._col0(Inner),Output:["_col1","_col4"] + <-Map 16 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Select Operator [SEL_38] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_116] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_36] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_42] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_108] (rows=1219/1028 width=269) - Conds:RS_30.key=RS_32.key(Inner),Output:["_col0","_col1","_col6"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:key - Filter Operator [FIL_99] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - TableScan [TS_27] (rows=500/500 width=178) - default@src,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 16 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:key - Filter Operator [FIL_100] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_28] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 10 [SIMPLE_EDGE] - <-Map 12 [CONTAINS] - Reduce Output Operator [RS_37] - PartitionCols:_col1 - Select Operator [SEL_26] (rows=1025/1025 width=90) - Output:["_col1"] - Select Operator [SEL_22] (rows=500/500 width=91) - Output:["_col1"] - Filter Operator [FIL_97] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_21] (rows=500/500 width=91) - Output:["value"] - <-Map 13 [CONTAINS] - Reduce Output Operator [RS_37] - PartitionCols:_col1 - Select Operator [SEL_26] (rows=1025/1025 width=90) - Output:["_col1"] - Select Operator [SEL_25] (rows=500/500 width=91) - Output:["_col1"] - Filter Operator [FIL_98] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_24] (rows=500/500 width=91) - Output:["value"] - <-Map 9 [CONTAINS] - Reduce Output Operator [RS_37] - PartitionCols:_col1 - Select Operator [SEL_26] (rows=1025/1025 width=90) - Output:["_col1"] - Select Operator [SEL_20] (rows=25/25 width=89) - Output:["_col1"] - Filter Operator [FIL_96] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_19] (rows=25/25 width=89) - Output:["value"] - File Output Operator [FS_68] + Merge Join Operator [MERGEJOIN_125] (rows=2394/2097 width=87) + Conds:Union 10._col0=RS_40._col1(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col1 + Select Operator [SEL_35] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_115] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_33] (rows=500/500 width=178) + default@src,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 10 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] + Reduce Output Operator [RS_39] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_113] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_24] (rows=500/500 width=91) + Output:["value"] + <-Map 14 [CONTAINS] + Reduce Output Operator [RS_39] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_114] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_29] (rows=500/500 width=91) + Output:["value"] + <-Map 9 [CONTAINS] + Reduce Output Operator [RS_39] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_112] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_21] (rows=25/25 width=89) + Output:["value"] + File Output Operator [FS_81] table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_39] - File Output Operator [FS_70] + Please refer to the previous Select Operator [SEL_45] + File Output Operator [FS_83] table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_39] + Please refer to the previous Select Operator [SEL_45] <-Reducer 19 [CONTAINS] - File Output Operator [FS_66] + File Output Operator [FS_79] table:{"name:":"default.a"} - Select Operator [SEL_63] (rows=313/820 width=175) + Select Operator [SEL_76] (rows=313/820 width=175) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_112] (rows=313/820 width=175) - Conds:RS_59._col1=Union 18._col1(Inner),Output:["_col0","_col6"] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_59] + Merge Join Operator [MERGEJOIN_128] (rows=313/820 width=175) + Conds:RS_73._col1=Union 22._col0(Inner),Output:["_col0","_col3"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_73] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_109] (rows=44/115 width=264) - Conds:RS_54.key=RS_56.key(Inner),Output:["_col0","_col1","_col6"] - <-Map 23 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:key - Filter Operator [FIL_105] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_51] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 25 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:key - Filter Operator [FIL_106] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_52] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 18 [SIMPLE_EDGE] - <-Map 17 [CONTAINS] - Reduce Output Operator [RS_61] - PartitionCols:_col1 - Select Operator [SEL_50] (rows=1525/1525 width=90) - Output:["_col1"] - Select Operator [SEL_42] (rows=25/25 width=89) - Output:["_col1"] - Filter Operator [FIL_101] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_41] (rows=25/25 width=89) - Output:["value"] - <-Map 20 [CONTAINS] - Reduce Output Operator [RS_61] - PartitionCols:_col1 - Select Operator [SEL_50] (rows=1525/1525 width=90) - Output:["_col1"] - Select Operator [SEL_44] (rows=500/500 width=91) - Output:["_col1"] - Filter Operator [FIL_102] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_43] (rows=500/500 width=91) - Output:["value"] + Merge Join Operator [MERGEJOIN_127] (rows=44/115 width=264) + Conds:RS_70._col0=RS_71._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col0 + Select Operator [SEL_50] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_117] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_48] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Select Operator [SEL_53] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_118] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_51] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 22 [SIMPLE_EDGE] <-Map 21 [CONTAINS] - Reduce Output Operator [RS_61] - PartitionCols:_col1 - Select Operator [SEL_50] (rows=1525/1525 width=90) - Output:["_col1"] - Select Operator [SEL_47] (rows=500/500 width=91) - Output:["_col1"] - Filter Operator [FIL_103] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_46] (rows=500/500 width=91) - Output:["value"] - <-Map 22 [CONTAINS] - Reduce Output Operator [RS_61] - PartitionCols:_col1 - Select Operator [SEL_50] (rows=1525/1525 width=90) - Output:["_col1"] - Select Operator [SEL_49] (rows=500/500 width=91) - Output:["_col1"] - Filter Operator [FIL_104] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_48] (rows=500/500 width=91) - Output:["value"] - File Output Operator [FS_68] + Reduce Output Operator [RS_74] + PartitionCols:_col0 + Select Operator [SEL_56] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_119] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_54] (rows=25/25 width=89) + Output:["value"] + <-Map 23 [CONTAINS] + Reduce Output Operator [RS_74] + PartitionCols:_col0 + Select Operator [SEL_59] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_120] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_57] (rows=500/500 width=91) + Output:["value"] + <-Map 24 [CONTAINS] + Reduce Output Operator [RS_74] + PartitionCols:_col0 + Select Operator [SEL_64] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_121] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_62] (rows=500/500 width=91) + Output:["value"] + <-Map 25 [CONTAINS] + Reduce Output Operator [RS_74] + PartitionCols:_col0 + Select Operator [SEL_68] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_122] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_66] (rows=500/500 width=91) + Output:["value"] + File Output Operator [FS_81] table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_63] - File Output Operator [FS_70] + Please refer to the previous Select Operator [SEL_76] + File Output Operator [FS_83] table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_63] - <-Reducer 3 [CONTAINS] - File Output Operator [FS_66] + Please refer to the previous Select Operator [SEL_76] + <-Reducer 4 [CONTAINS] + File Output Operator [FS_79] table:{"name:":"default.a"} - Select Operator [SEL_18] (rows=147/170 width=177) + Select Operator [SEL_20] (rows=148/170 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_110] (rows=147/170 width=177) - Conds:RS_14._col1=Union 2._col1(Inner),Output:["_col0","_col6"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_14] + Merge Join Operator [MERGEJOIN_124] (rows=148/170 width=177) + Conds:RS_17._col1=RS_18._col0(Inner),Output:["_col1","_col4"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_13] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_111] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_11] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_107] (rows=60/37 width=266) - Conds:RS_9.key=RS_11.key(Inner),Output:["_col0","_col1","_col6"] - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_9] - PartitionCols:key - Filter Operator [FIL_94] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_6] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_11] - PartitionCols:key - Filter Operator [FIL_95] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_7] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_16] - PartitionCols:_col1 - Select Operator [SEL_5] (rows=525/525 width=90) - Output:["_col1"] - Select Operator [SEL_1] (rows=25/25 width=89) - Output:["_col1"] - Filter Operator [FIL_92] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_0] (rows=25/25 width=89) - Output:["value"] - <-Map 5 [CONTAINS] - Reduce Output Operator [RS_16] - PartitionCols:_col1 - Select Operator [SEL_5] (rows=525/525 width=90) - Output:["_col1"] - Select Operator [SEL_3] (rows=500/500 width=91) - Output:["_col1"] - Filter Operator [FIL_93] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_2] (rows=500/500 width=91) - Output:["value"] - File Output Operator [FS_68] + Merge Join Operator [MERGEJOIN_123] (rows=61/108 width=86) + Conds:Union 2._col0=RS_15._col1(Inner),Output:["_col1"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Select Operator [SEL_10] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_110] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_8] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_108] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_0] (rows=25/25 width=89) + Output:["value"] + <-Map 6 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_109] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_3] (rows=500/500 width=91) + Output:["value"] + File Output Operator [FS_81] table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_18] - File Output Operator [FS_70] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_83] table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_18] + Please refer to the previous Select Operator [SEL_20] Stage-6 Stats-Aggr Operator Stage-1 @@ -1434,32 +1428,32 @@ INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) +Map 10 <- Union 2 (CONTAINS) Map 13 <- Union 14 (CONTAINS) -Map 19 <- Union 14 (CONTAINS) -Map 20 <- Union 16 (CONTAINS) -Map 24 <- Union 25 (CONTAINS) -Map 32 <- Union 25 (CONTAINS) -Map 33 <- Union 27 (CONTAINS) -Map 34 <- Union 29 (CONTAINS) -Map 9 <- Union 2 (CONTAINS) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Map 20 <- Union 14 (CONTAINS) +Map 21 <- Union 16 (CONTAINS) +Map 28 <- Union 29 (CONTAINS) +Map 35 <- Union 29 (CONTAINS) +Map 36 <- Union 31 (CONTAINS) +Map 37 <- Union 33 (CONTAINS) Reducer 15 <- Union 14 (SIMPLE_EDGE), Union 16 (CONTAINS) Reducer 17 <- Union 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 26 <- Union 25 (SIMPLE_EDGE), Union 27 (CONTAINS) -Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS) +Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Map 23 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE), Union 8 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 30 <- Union 29 (SIMPLE_EDGE) -Reducer 31 <- Reducer 30 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) -Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 32 <- Union 31 (SIMPLE_EDGE), Union 33 (CONTAINS) +Reducer 34 <- Union 33 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 9 <- Union 8 (SIMPLE_EDGE) Stage-5 Stats-Aggr Operator @@ -1469,225 +1463,243 @@ Stage-5 Stage-4 Dependency Collection{} Stage-3 - Reducer 8 - File Output Operator [FS_106] + Reducer 9 + File Output Operator [FS_115] table:{"name:":"default.a"} - Group By Operator [GBY_103] (rows=6298/319 width=178) + Group By Operator [GBY_112] (rows=6300/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 31 [CONTAINS] - Reduce Output Operator [RS_102] + <-Union 8 [SIMPLE_EDGE] + <-Reducer 26 [CONTAINS] + Reduce Output Operator [RS_111] PartitionCols:_col0, _col1 - Select Operator [SEL_98] (rows=313/304 width=175) + Select Operator [SEL_107] (rows=313/304 width=175) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_152] (rows=313/304 width=175) - Conds:RS_94._col1=RS_96._col1(Inner),Output:["_col0","_col6"] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_96] + Merge Join Operator [MERGEJOIN_164] (rows=313/304 width=175) + Conds:RS_104._col1=RS_105._col1(Inner),Output:["_col0","_col3"] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_163] (rows=44/115 width=264) + Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 24 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_67] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_153] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_65] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 27 [SIMPLE_EDGE] + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_154] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_68] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_105] PartitionCols:_col1 - Select Operator [SEL_85] (rows=1525/319 width=178) + Select Operator [SEL_100] (rows=1525/319 width=178) Output:["_col1"] - Group By Operator [GBY_84] (rows=1525/319 width=178) + Group By Operator [GBY_99] (rows=1525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 29 [SIMPLE_EDGE] - <-Map 34 [CONTAINS] - Reduce Output Operator [RS_83] - PartitionCols:_col0, _col1 - Select Operator [SEL_79] (rows=500/500 width=178) + <-Union 33 [SIMPLE_EDGE] + <-Map 37 [CONTAINS] + Reduce Output Operator [RS_98] + PartitionCols:_col1, _col0 + Select Operator [SEL_94] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_144] (rows=500/500 width=178) + Filter Operator [FIL_158] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_78] (rows=500/500 width=178) + TableScan [TS_92] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 28 [CONTAINS] - Reduce Output Operator [RS_83] - PartitionCols:_col0, _col1 - Group By Operator [GBY_76] (rows=1025/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 27 [SIMPLE_EDGE] - <-Map 33 [CONTAINS] - Reduce Output Operator [RS_75] - PartitionCols:_col0, _col1 - Select Operator [SEL_71] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_143] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_70] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 26 [CONTAINS] - Reduce Output Operator [RS_75] - PartitionCols:_col0, _col1 - Group By Operator [GBY_68] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 25 [SIMPLE_EDGE] - <-Map 24 [CONTAINS] - Reduce Output Operator [RS_67] - PartitionCols:_col0, _col1 - Select Operator [SEL_61] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_141] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_60] (rows=25/25 width=175) - Output:["key","value"] - <-Map 32 [CONTAINS] - Reduce Output Operator [RS_67] - PartitionCols:_col0, _col1 - Select Operator [SEL_63] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_142] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_62] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 36 [SIMPLE_EDGE] - SHUFFLE [RS_94] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_149] (rows=44/115 width=264) - Conds:RS_89.key=RS_91.key(Inner),Output:["_col0","_col1","_col6"] - <-Map 35 [SIMPLE_EDGE] - SHUFFLE [RS_89] - PartitionCols:key - Filter Operator [FIL_145] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_86] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 37 [SIMPLE_EDGE] - SHUFFLE [RS_91] - PartitionCols:key - Filter Operator [FIL_146] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_87] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 6 [CONTAINS] - Reduce Output Operator [RS_102] + <-Reducer 32 [CONTAINS] + Reduce Output Operator [RS_98] + PartitionCols:_col1, _col0 + Select Operator [SEL_91] (rows=1025/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_90] (rows=1025/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 31 [SIMPLE_EDGE] + <-Map 36 [CONTAINS] + Reduce Output Operator [RS_89] + PartitionCols:_col1, _col0 + Select Operator [SEL_85] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_83] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 30 [CONTAINS] + Reduce Output Operator [RS_89] + PartitionCols:_col1, _col0 + Select Operator [SEL_82] (rows=525/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_81] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 29 [SIMPLE_EDGE] + <-Map 28 [CONTAINS] + Reduce Output Operator [RS_80] + PartitionCols:_col1, _col0 + Select Operator [SEL_73] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_71] (rows=25/25 width=175) + Output:["key","value"] + <-Map 35 [CONTAINS] + Reduce Output Operator [RS_80] + PartitionCols:_col1, _col0 + Select Operator [SEL_76] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_156] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_74] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 7 [CONTAINS] + Reduce Output Operator [RS_111] PartitionCols:_col0, _col1 - Group By Operator [GBY_58] (rows=5985/309 width=178) + Group By Operator [GBY_63] (rows=5987/309 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 18 [CONTAINS] - Reduce Output Operator [RS_57] + <-Union 6 [SIMPLE_EDGE] + <-Reducer 19 [CONTAINS] + Reduce Output Operator [RS_62] PartitionCols:_col0, _col1 - Select Operator [SEL_53] (rows=5838/1056 width=178) + Select Operator [SEL_58] (rows=5839/1056 width=178) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_151] (rows=5838/1056 width=178) - Conds:RS_49._col1=RS_51._col1(Inner),Output:["_col0","_col6"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Select Operator [SEL_40] (rows=1025/319 width=178) - Output:["_col1"] - Group By Operator [GBY_39] (rows=1025/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 16 [SIMPLE_EDGE] - <-Map 20 [CONTAINS] - Reduce Output Operator [RS_38] - PartitionCols:_col0, _col1 - Select Operator [SEL_34] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_138] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_33] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 15 [CONTAINS] - Reduce Output Operator [RS_38] - PartitionCols:_col0, _col1 - Group By Operator [GBY_31] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] - Reduce Output Operator [RS_30] - PartitionCols:_col0, _col1 - Select Operator [SEL_24] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_136] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_23] (rows=25/25 width=175) - Output:["key","value"] - <-Map 19 [CONTAINS] - Reduce Output Operator [RS_30] - PartitionCols:_col0, _col1 - Select Operator [SEL_26] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_137] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_25] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_148] (rows=1219/1028 width=269) - Conds:RS_44.key=RS_46.key(Inner),Output:["_col0","_col1","_col6"] - <-Map 21 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:key - Filter Operator [FIL_139] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - TableScan [TS_41] (rows=500/500 width=178) - default@src,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 23 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:key - Filter Operator [FIL_140] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_42] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_57] + Merge Join Operator [MERGEJOIN_162] (rows=5839/1056 width=178) + Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col2","_col5"] + <-Map 23 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_152] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_49] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_161] (rows=2394/512 width=87) + Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2"] + <-Map 22 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col1 + Select Operator [SEL_48] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_151] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_46] (rows=500/500 width=178) + default@src,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col1 + Select Operator [SEL_45] (rows=1025/319 width=178) + Output:["_col1"] + Group By Operator [GBY_44] (rows=1025/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 16 [SIMPLE_EDGE] + <-Map 21 [CONTAINS] + Reduce Output Operator [RS_43] + PartitionCols:_col1, _col0 + Select Operator [SEL_39] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_150] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_37] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 15 [CONTAINS] + Reduce Output Operator [RS_43] + PartitionCols:_col1, _col0 + Select Operator [SEL_36] (rows=525/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_35] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] + Reduce Output Operator [RS_34] + PartitionCols:_col1, _col0 + Select Operator [SEL_27] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_148] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_25] (rows=25/25 width=175) + Output:["key","value"] + <-Map 20 [CONTAINS] + Reduce Output Operator [RS_34] + PartitionCols:_col1, _col0 + Select Operator [SEL_30] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_149] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_28] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 5 [CONTAINS] + Reduce Output Operator [RS_62] PartitionCols:_col0, _col1 - Select Operator [SEL_22] (rows=147/61 width=177) + Select Operator [SEL_24] (rows=148/61 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_150] (rows=147/61 width=177) - Conds:RS_18._col1=RS_20._col1(Inner),Output:["_col0","_col6"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_147] (rows=60/37 width=266) - Conds:RS_13.key=RS_15.key(Inner),Output:["_col0","_col1","_col6"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:key - Filter Operator [FIL_134] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_10] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:key - Filter Operator [FIL_135] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_11] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col1 - Select Operator [SEL_9] (rows=525/319 width=178) - Output:["_col1"] - Group By Operator [GBY_8] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_7] - PartitionCols:_col0, _col1 - Select Operator [SEL_1] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_132] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_0] (rows=25/25 width=175) - Output:["key","value"] - <-Map 9 [CONTAINS] - Reduce Output Operator [RS_7] - PartitionCols:_col0, _col1 - Select Operator [SEL_3] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_133] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_2] (rows=500/500 width=178) - Output:["key","value"] - File Output Operator [FS_108] + Merge Join Operator [MERGEJOIN_160] (rows=148/61 width=177) + Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col2","_col5"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_147] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_15] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_159] (rows=61/52 width=86) + Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col1 + Select Operator [SEL_14] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_146] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_12] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Select Operator [SEL_11] (rows=525/319 width=178) + Output:["_col1"] + Group By Operator [GBY_10] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] + Reduce Output Operator [RS_9] + PartitionCols:_col1, _col0 + Select Operator [SEL_2] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_144] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_0] (rows=25/25 width=175) + Output:["key","value"] + <-Map 10 [CONTAINS] + Reduce Output Operator [RS_9] + PartitionCols:_col1, _col0 + Select Operator [SEL_5] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_145] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_3] (rows=500/500 width=178) + Output:["key","value"] + File Output Operator [FS_117] table:{"name:":"default.b"} - Please refer to the previous Group By Operator [GBY_103] - File Output Operator [FS_110] + Please refer to the previous Group By Operator [GBY_112] + File Output Operator [FS_119] table:{"name:":"default.c"} - Please refer to the previous Group By Operator [GBY_103] + Please refer to the previous Group By Operator [GBY_112] Stage-6 Stats-Aggr Operator Stage-1 @@ -1749,7 +1761,7 @@ FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Map 6 <- Union 3 (CONTAINS) @@ -1766,40 +1778,40 @@ Stage-4 Dependency Collection{} Stage-2 Reducer 5 - File Output Operator [FS_17] + File Output Operator [FS_18] table:{"name:":"default.dest1"} - Group By Operator [GBY_15] (rows=205/310 width=96) + Group By Operator [GBY_16] (rows=205/310 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_14] + SHUFFLE [RS_15] PartitionCols:_col0 - Group By Operator [GBY_11] (rows=501/310 width=272) + Group By Operator [GBY_12] (rows=501/310 width=272) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 3 [SIMPLE_EDGE] <-Map 6 [CONTAINS] - Reduce Output Operator [RS_10] + Reduce Output Operator [RS_11] PartitionCols:_col0, _col1 - Select Operator [SEL_6] (rows=500/500 width=178) + Select Operator [SEL_7] (rows=500/500 width=178) Output:["_col0","_col1"] - TableScan [TS_5] (rows=500/500 width=178) + TableScan [TS_6] (rows=500/500 width=178) Output:["key","value"] <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_10] + Reduce Output Operator [RS_11] PartitionCols:_col0, _col1 - Select Operator [SEL_4] (rows=1/1 width=272) + Select Operator [SEL_5] (rows=1/1 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_3] (rows=1/1 width=8) + Group By Operator [GBY_4] (rows=1/1 width=8) Output:["_col0"],aggregations:["count(1)"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_2] + SHUFFLE [RS_3] Select Operator [SEL_1] (rows=500/500 width=10) TableScan [TS_0] (rows=500/500 width=10) default@src,s1,Tbl:COMPLETE,Col:COMPLETE - File Output Operator [FS_22] + File Output Operator [FS_23] table:{"name:":"default.dest2"} - Group By Operator [GBY_20] (rows=501/310 width=280) + Group By Operator [GBY_21] (rows=501/310 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_11] + Please refer to the previous Group By Operator [GBY_12] Stage-5 Stats-Aggr Operator Stage-1 @@ -1976,7 +1988,7 @@ INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Map 6 <- Union 3 (CONTAINS) @@ -1994,54 +2006,50 @@ Stage-4 Dependency Collection{} Stage-2 Reducer 4 - File Output Operator [FS_18] + File Output Operator [FS_17] table:{"name:":"default.dest1"} - Group By Operator [GBY_16] (rows=205/310 width=96) + Group By Operator [GBY_15] (rows=205/310 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] <-Map 6 [CONTAINS] - Reduce Output Operator [RS_15] + Reduce Output Operator [RS_14] PartitionCols:_col0 - Select Operator [SEL_8] (rows=501/501 width=272) + Select Operator [SEL_7] (rows=500/500 width=178) Output:["_col0","_col1"] - Select Operator [SEL_6] (rows=500/500 width=266) - Output:["_col0","_col1"] - TableScan [TS_5] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_20] + TableScan [TS_6] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_19] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_8] + Please refer to the previous Select Operator [SEL_7] <-Map 7 [CONTAINS] - Reduce Output Operator [RS_15] + Reduce Output Operator [RS_14] PartitionCols:_col0 Select Operator [SEL_11] (rows=500/500 width=178) Output:["_col0","_col1"] TableScan [TS_10] (rows=500/500 width=178) Output:["key","value"] - Reduce Output Operator [RS_20] + Reduce Output Operator [RS_19] PartitionCols:_col0, _col1 Please refer to the previous Select Operator [SEL_11] <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_15] + Reduce Output Operator [RS_14] PartitionCols:_col0 - Select Operator [SEL_8] (rows=501/501 width=272) + Select Operator [SEL_5] (rows=1/1 width=272) Output:["_col0","_col1"] - Select Operator [SEL_4] (rows=1/1 width=360) - Output:["_col0","_col1"] - Group By Operator [GBY_3] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count(1)"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_20] + Group By Operator [GBY_4] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count(1)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_3] + Select Operator [SEL_1] (rows=500/500 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_19] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_8] + Please refer to the previous Select Operator [SEL_5] Reducer 5 - File Output Operator [FS_23] + File Output Operator [FS_22] table:{"name:":"default.dest2"} - Group By Operator [GBY_21] (rows=1001/310 width=280) + Group By Operator [GBY_20] (rows=1001/310 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 <- Please refer to the previous Union 3 [SIMPLE_EDGE] Stage-5 @@ -2095,7 +2103,7 @@ INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value POSTHOOK: type: QUERY -Plan not optimized by CBO. +Plan optimized by CBO. Vertex dependency in root stage Map 6 <- Union 3 (CONTAINS) @@ -2112,44 +2120,40 @@ Stage-4 Dependency Collection{} Stage-2 Reducer 4 - File Output Operator [FS_13] + File Output Operator [FS_14] table:{"name:":"default.dest1"} - Group By Operator [GBY_11] (rows=205/310 width=96) + Group By Operator [GBY_12] (rows=205/310 width=96) Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 <-Union 3 [SIMPLE_EDGE] <-Map 6 [CONTAINS] - Reduce Output Operator [RS_10] + Reduce Output Operator [RS_11] PartitionCols:_col0 - Select Operator [SEL_8] (rows=501/501 width=266) + Select Operator [SEL_7] (rows=500/500 width=178) Output:["_col0","_col1"] - Select Operator [SEL_6] (rows=500/500 width=266) - Output:["_col0","_col1"] - TableScan [TS_5] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_15] + TableScan [TS_6] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_16] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_8] + Please refer to the previous Select Operator [SEL_7] <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_10] + Reduce Output Operator [RS_11] PartitionCols:_col0 - Select Operator [SEL_8] (rows=501/501 width=266) + Select Operator [SEL_5] (rows=1/1 width=272) Output:["_col0","_col1"] - Select Operator [SEL_4] (rows=1/1 width=360) - Output:["_col0","_col1"] - Group By Operator [GBY_3] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count(1)"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_15] + Group By Operator [GBY_4] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count(1)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_3] + Select Operator [SEL_1] (rows=500/500 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_16] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_8] + Please refer to the previous Select Operator [SEL_5] Reducer 5 - File Output Operator [FS_18] + File Output Operator [FS_19] table:{"name:":"default.dest2"} - Group By Operator [GBY_16] (rows=501/310 width=280) + Group By Operator [GBY_17] (rows=501/310 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 <- Please refer to the previous Union 3 [SIMPLE_EDGE] Stage-5 diff --git ql/src/test/results/clientpositive/union17.q.out ql/src/test/results/clientpositive/union17.q.out index 650aef4..b4a6034 100644 --- ql/src/test/results/clientpositive/union17.q.out +++ ql/src/test/results/clientpositive/union17.q.out @@ -225,10 +225,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT DEST1.* FROM DEST1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git ql/src/test/results/clientpositive/union18.q.out ql/src/test/results/clientpositive/union18.q.out index 5993280..d1fad69 100644 --- ql/src/test/results/clientpositive/union18.q.out +++ ql/src/test/results/clientpositive/union18.q.out @@ -268,10 +268,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT DEST1.* FROM DEST1 SORT BY DEST1.key, DEST1.value PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git ql/src/test/results/clientpositive/union19.q.out ql/src/test/results/clientpositive/union19.q.out index 89d9c6d..c208bf2 100644 --- ql/src/test/results/clientpositive/union19.q.out +++ ql/src/test/results/clientpositive/union19.q.out @@ -203,10 +203,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: SELECT DEST1.* FROM DEST1 SORT BY DEST1.key, DEST1.value PREHOOK: type: QUERY PREHOOK: Input: default@dest1 diff --git ql/src/test/results/clientpositive/union31.q.out ql/src/test/results/clientpositive/union31.q.out index bb35d5c..3b7461a 100644 --- ql/src/test/results/clientpositive/union31.q.out +++ ql/src/test/results/clientpositive/union31.q.out @@ -360,11 +360,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -483,11 +483,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -670,11 +670,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/union_remove_6.q.out ql/src/test/results/clientpositive/union_remove_6.q.out index 0a80e63..2c30948 100644 --- ql/src/test/results/clientpositive/union_remove_6.q.out +++ ql/src/test/results/clientpositive/union_remove_6.q.out @@ -80,11 +80,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -178,11 +178,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/union_remove_6_subq.q.out ql/src/test/results/clientpositive/union_remove_6_subq.q.out index 7b306ed..c66690b 100644 --- ql/src/test/results/clientpositive/union_remove_6_subq.q.out +++ ql/src/test/results/clientpositive/union_remove_6_subq.q.out @@ -86,11 +86,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE @@ -184,11 +184,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE