diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 5305537..ff45230 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1815,6 +1815,69 @@ private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, } } + private void extractJoinCondsFromWhereClause(QBJoinTree joinTree, QB qb, String dest, ASTNode predicate) throws SemanticException { + + switch (predicate.getType()) { + case HiveParser.KW_AND: + extractJoinCondsFromWhereClause(joinTree, qb, dest, (ASTNode) predicate.getChild(0)); + extractJoinCondsFromWhereClause(joinTree, qb, dest, (ASTNode) predicate.getChild(1)); + break; + case HiveParser.EQUAL_NS: + case HiveParser.EQUAL: + + ASTNode leftCondn = (ASTNode) predicate.getChild(0); + ArrayList leftCondAl1 = new ArrayList(); + ArrayList leftCondAl2 = new ArrayList(); + try { + parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, + null); + } catch(SemanticException se) { + // suppress here; if it is a real issue will get caught in where clause handling. + return; + } + + ASTNode rightCondn = (ASTNode) predicate.getChild(1); + ArrayList rightCondAl1 = new ArrayList(); + ArrayList rightCondAl2 = new ArrayList(); + try { + parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, + rightCondAl2, null); + } catch(SemanticException se) { + // suppress here; if it is a real issue will get caught in where clause handling. + return; + } + + if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) + || ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) { + // this is not a join condition. + return; + } + + if (((leftCondAl1.size() == 0) && (leftCondAl2.size() == 0)) + || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) { + // this is not a join condition. Will get handled by predicate pushdown. + return; + } + + List leftSrc = new ArrayList(); + JoinCond cond = joinTree.getJoinCond()[0]; + JoinType type = cond.getJoinType(); + applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc, + predicate, leftCondn, rightCondn, + leftCondAl1, leftCondAl2, + rightCondAl1, rightCondAl2); + if (leftSrc.size() == 1) { + joinTree.setLeftAlias(leftSrc.get(0)); + } + + // todo: hold onto this predicate, so that we don't add it to the Filter Operator. + + break; + default: + return; + } + } + @SuppressWarnings("nls") public Operator putOpInsertMap(Operator op, RowResolver rr) { @@ -8492,6 +8555,18 @@ public Operator genPlan(QB qb) throws SemanticException { } else { QBJoinTree joinTree = genJoinTree(qb, joinExpr, aliasToOpInfo); qb.setQbJoinTree(joinTree); + /* + * if there is only one destintaion in Query try to push where predicates + * as Join conditions + */ + Set dests = qb.getParseInfo().getClauseNames(); + if ( dests.size() == 1 ) { + String dest = dests.iterator().next(); + ASTNode whereClause = qb.getParseInfo().getWhrForClause(dest); + if ( whereClause != null ) { + extractJoinCondsFromWhereClause(joinTree, qb, dest, (ASTNode) whereClause.getChild(0) ); + } + } mergeJoinTree(qb); } diff --git ql/src/test/queries/clientpositive/join_cond_pushdown_3.q ql/src/test/queries/clientpositive/join_cond_pushdown_3.q new file mode 100644 index 0000000..b308838 --- /dev/null +++ ql/src/test/queries/clientpositive/join_cond_pushdown_3.q @@ -0,0 +1,34 @@ +DROP TABLE part; + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part; + + + +explain select * +from part p1 join part p2 join part p3 +where p1.p_name = p2.p_name and p2.p_name = p3.p_name; + +explain select * +from part p1 join part p2 join part p3 +where p2.p_name = p1.p_name and p3.p_name = p2.p_name; + +explain select * +from part p1 join part p2 join part p3 +where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name; + +explain select * +from part p1 join part p2 join part p3 +where p2.p_partkey = 1 and p3.p_name = p2.p_name; diff --git ql/src/test/queries/clientpositive/join_cond_pushdown_4.q ql/src/test/queries/clientpositive/join_cond_pushdown_4.q new file mode 100644 index 0000000..477682e --- /dev/null +++ ql/src/test/queries/clientpositive/join_cond_pushdown_4.q @@ -0,0 +1,26 @@ +DROP TABLE part; + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part; + + +explain select * +from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_name = p4.p_name; + +explain select * +from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey; diff --git ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out index 4aa9fa7..bfd8770 100644 --- ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out +++ ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out @@ -34,9 +34,8 @@ ABSTRACT SYNTAX TREE: STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -53,13 +52,17 @@ STAGE PLANS: type: string outputColumnNames: _col0, _col1 Reduce Output Operator - sort order: + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string tag: 0 value expressions: expr: _col0 type: string - expr: _col1 - type: string ttt:tmp2:t1 TableScan alias: t1 @@ -69,49 +72,14 @@ STAGE PLANS: type: string outputColumnNames: _col0 Reduce Output Operator - sort order: - tag: 1 - value expressions: + key expressions: expr: _col0 type: string - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} - handleSkewJoin: false - outputColumnNames: _col0, _col1, _col2 - Filter Operator - predicate: - expr: (_col1 = _col2) - type: boolean - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - $INTNAME - TableScan - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col0 - type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 ttt:tmp3:t1 TableScan alias: t1 @@ -128,19 +96,21 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string - tag: 1 + tag: 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 condition expressions: - 0 {VALUE._col1} + 0 {VALUE._col0} 1 + 2 handleSkewJoin: false - outputColumnNames: _col1 + outputColumnNames: _col0 Select Operator expressions: - expr: _col1 + expr: _col0 type: string outputColumnNames: _col0 Limit @@ -152,7 +122,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -185,7 +155,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out new file mode 100644 index 0000000..9f6f588 --- /dev/null +++ ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out @@ -0,0 +1,943 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +POSTHOOK: Output: default@part +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p1.p_name = p2.p_name and p2.p_name = p3.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p1.p_name = p2.p_name and p2.p_name = p3.p_name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME part) p1) (TOK_TABREF (TOK_TABNAME part) p2)) (TOK_TABREF (TOK_TABNAME part) p3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL p1) p_name) (. (TOK_TABLE_OR_COL p2) p_name)) (= (. (TOK_TABLE_OR_COL p2) p_name) (. (TOK_TABLE_OR_COL p3) p_name)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + p1 + TableScan + alias: p1 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 0 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + p2 + TableScan + alias: p2 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 1 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + p3 + TableScan + alias: p3 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 2 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 + Filter Operator + predicate: + expr: ((_col1 = _col12) and (_col12 = _col23)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: int + expr: _col6 + type: string + expr: _col7 + type: double + expr: _col8 + type: string + expr: _col11 + type: int + expr: _col12 + type: string + expr: _col13 + type: string + expr: _col14 + type: string + expr: _col15 + type: string + expr: _col16 + type: int + expr: _col17 + type: string + expr: _col18 + type: double + expr: _col19 + type: string + expr: _col22 + type: int + expr: _col23 + type: string + expr: _col24 + type: string + expr: _col25 + type: string + expr: _col26 + type: string + expr: _col27 + type: int + expr: _col28 + type: string + expr: _col29 + type: double + expr: _col30 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_name = p1.p_name and p3.p_name = p2.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_name = p1.p_name and p3.p_name = p2.p_name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME part) p1) (TOK_TABREF (TOK_TABNAME part) p2)) (TOK_TABREF (TOK_TABNAME part) p3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL p2) p_name) (. (TOK_TABLE_OR_COL p1) p_name)) (= (. (TOK_TABLE_OR_COL p3) p_name) (. (TOK_TABLE_OR_COL p2) p_name)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + p1 + TableScan + alias: p1 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 0 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + p2 + TableScan + alias: p2 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 1 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + p3 + TableScan + alias: p3 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 2 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 + Filter Operator + predicate: + expr: ((_col12 = _col1) and (_col23 = _col12)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: int + expr: _col6 + type: string + expr: _col7 + type: double + expr: _col8 + type: string + expr: _col11 + type: int + expr: _col12 + type: string + expr: _col13 + type: string + expr: _col14 + type: string + expr: _col15 + type: string + expr: _col16 + type: int + expr: _col17 + type: string + expr: _col18 + type: double + expr: _col19 + type: string + expr: _col22 + type: int + expr: _col23 + type: string + expr: _col24 + type: string + expr: _col25 + type: string + expr: _col26 + type: string + expr: _col27 + type: int + expr: _col28 + type: string + expr: _col29 + type: double + expr: _col30 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME part) p1) (TOK_TABREF (TOK_TABNAME part) p2)) (TOK_TABREF (TOK_TABNAME part) p3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (+ (. (TOK_TABLE_OR_COL p2) p_partkey) (. (TOK_TABLE_OR_COL p1) p_partkey)) (. (TOK_TABLE_OR_COL p1) p_partkey)) (= (. (TOK_TABLE_OR_COL p3) p_name) (. (TOK_TABLE_OR_COL p2) p_name)))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + p1 + TableScan + alias: p1 + Reduce Output Operator + sort order: + tag: 0 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + p2 + TableScan + alias: p2 + Reduce Output Operator + sort order: + tag: 1 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Filter Operator + predicate: + expr: ((_col11 + _col0) = _col0) + type: boolean + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col12 + type: string + sort order: + + Map-reduce partition columns: + expr: _col12 + type: string + tag: 0 + value expressions: + expr: _col11 + type: int + expr: _col12 + type: string + expr: _col13 + type: string + expr: _col14 + type: string + expr: _col15 + type: string + expr: _col16 + type: int + expr: _col17 + type: string + expr: _col18 + type: double + expr: _col19 + type: string + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: int + expr: _col6 + type: string + expr: _col7 + type: double + expr: _col8 + type: string + p3 + TableScan + alias: p3 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 1 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 + Filter Operator + predicate: + expr: (((_col0 + _col11) = _col11) and (_col23 = _col1)) + type: boolean + Select Operator + expressions: + expr: _col11 + type: int + expr: _col12 + type: string + expr: _col13 + type: string + expr: _col14 + type: string + expr: _col15 + type: string + expr: _col16 + type: int + expr: _col17 + type: string + expr: _col18 + type: double + expr: _col19 + type: string + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: int + expr: _col6 + type: string + expr: _col7 + type: double + expr: _col8 + type: string + expr: _col22 + type: int + expr: _col23 + type: string + expr: _col24 + type: string + expr: _col25 + type: string + expr: _col26 + type: string + expr: _col27 + type: int + expr: _col28 + type: string + expr: _col29 + type: double + expr: _col30 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_partkey = 1 and p3.p_name = p2.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_partkey = 1 and p3.p_name = p2.p_name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME part) p1) (TOK_TABREF (TOK_TABNAME part) p2)) (TOK_TABREF (TOK_TABNAME part) p3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL p2) p_partkey) 1) (= (. (TOK_TABLE_OR_COL p3) p_name) (. (TOK_TABLE_OR_COL p2) p_name)))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + p1 + TableScan + alias: p1 + Reduce Output Operator + sort order: + tag: 0 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + p2 + TableScan + alias: p2 + Filter Operator + predicate: + expr: (p_partkey = 1) + type: boolean + Reduce Output Operator + sort order: + tag: 1 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col12 + type: string + sort order: + + Map-reduce partition columns: + expr: _col12 + type: string + tag: 0 + value expressions: + expr: _col11 + type: int + expr: _col12 + type: string + expr: _col13 + type: string + expr: _col14 + type: string + expr: _col15 + type: string + expr: _col16 + type: int + expr: _col17 + type: string + expr: _col18 + type: double + expr: _col19 + type: string + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: int + expr: _col6 + type: string + expr: _col7 + type: double + expr: _col8 + type: string + p3 + TableScan + alias: p3 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 1 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 + Filter Operator + predicate: + expr: ((_col0 = 1) and (_col23 = _col1)) + type: boolean + Select Operator + expressions: + expr: _col11 + type: int + expr: _col12 + type: string + expr: _col13 + type: string + expr: _col14 + type: string + expr: _col15 + type: string + expr: _col16 + type: int + expr: _col17 + type: string + expr: _col18 + type: double + expr: _col19 + type: string + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: int + expr: _col6 + type: string + expr: _col7 + type: double + expr: _col8 + type: string + expr: _col22 + type: int + expr: _col23 + type: string + expr: _col24 + type: string + expr: _col25 + type: string + expr: _col26 + type: string + expr: _col27 + type: int + expr: _col28 + type: string + expr: _col29 + type: double + expr: _col30 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out new file mode 100644 index 0000000..28ad731 --- /dev/null +++ ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out @@ -0,0 +1,695 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +POSTHOOK: Output: default@part +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_name = p4.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_name = p4.p_name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME part) p1) (TOK_TABREF (TOK_TABNAME part) p2)) (TOK_TABREF (TOK_TABNAME part) p3) (= (. (TOK_TABLE_OR_COL p1) p_name) (. (TOK_TABLE_OR_COL p2) p_name))) (TOK_TABREF (TOK_TABNAME part) p4))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL p2) p_name) (. (TOK_TABLE_OR_COL p3) p_name)) (= (. (TOK_TABLE_OR_COL p1) p_name) (. (TOK_TABLE_OR_COL p4) p_name)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + p1 + TableScan + alias: p1 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 0 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + p2 + TableScan + alias: p2 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 1 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + p3 + TableScan + alias: p3 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 2 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + p4 + TableScan + alias: p4 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 3 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 3 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41 + Filter Operator + predicate: + expr: ((_col12 = _col23) and (_col1 = _col34)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: int + expr: _col6 + type: string + expr: _col7 + type: double + expr: _col8 + type: string + expr: _col11 + type: int + expr: _col12 + type: string + expr: _col13 + type: string + expr: _col14 + type: string + expr: _col15 + type: string + expr: _col16 + type: int + expr: _col17 + type: string + expr: _col18 + type: double + expr: _col19 + type: string + expr: _col22 + type: int + expr: _col23 + type: string + expr: _col24 + type: string + expr: _col25 + type: string + expr: _col26 + type: string + expr: _col27 + type: int + expr: _col28 + type: string + expr: _col29 + type: double + expr: _col30 + type: string + expr: _col33 + type: int + expr: _col34 + type: string + expr: _col35 + type: string + expr: _col36 + type: string + expr: _col37 + type: string + expr: _col38 + type: int + expr: _col39 + type: string + expr: _col40 + type: double + expr: _col41 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME part) p1) (TOK_TABREF (TOK_TABNAME part) p2)) (TOK_TABREF (TOK_TABNAME part) p3) (= (. (TOK_TABLE_OR_COL p2) p_name) (. (TOK_TABLE_OR_COL p1) p_name))) (TOK_TABREF (TOK_TABNAME part) p4))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL p2) p_name) (. (TOK_TABLE_OR_COL p3) p_name)) (= (. (TOK_TABLE_OR_COL p1) p_partkey) (. (TOK_TABLE_OR_COL p4) p_partkey))) (= (. (TOK_TABLE_OR_COL p1) p_partkey) (. (TOK_TABLE_OR_COL p2) p_partkey)))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + p1 + TableScan + alias: p1 + Reduce Output Operator + key expressions: + expr: p_name + type: string + expr: p_partkey + type: int + sort order: ++ + Map-reduce partition columns: + expr: p_name + type: string + expr: p_partkey + type: int + tag: 0 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + p2 + TableScan + alias: p2 + Reduce Output Operator + key expressions: + expr: p_name + type: string + expr: p_partkey + type: int + sort order: ++ + Map-reduce partition columns: + expr: p_name + type: string + expr: p_partkey + type: int + tag: 1 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col12 + type: string + sort order: + + Map-reduce partition columns: + expr: _col12 + type: string + tag: 0 + value expressions: + expr: _col11 + type: int + expr: _col12 + type: string + expr: _col13 + type: string + expr: _col14 + type: string + expr: _col15 + type: string + expr: _col16 + type: int + expr: _col17 + type: string + expr: _col18 + type: double + expr: _col19 + type: string + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: int + expr: _col6 + type: string + expr: _col7 + type: double + expr: _col8 + type: string + p3 + TableScan + alias: p3 + Reduce Output Operator + key expressions: + expr: p_name + type: string + sort order: + + Map-reduce partition columns: + expr: p_name + type: string + tag: 1 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col11 + type: int + sort order: + + Map-reduce partition columns: + expr: _col11 + type: int + tag: 0 + value expressions: + expr: _col22 + type: int + expr: _col23 + type: string + expr: _col24 + type: string + expr: _col25 + type: string + expr: _col26 + type: string + expr: _col27 + type: int + expr: _col28 + type: string + expr: _col29 + type: double + expr: _col30 + type: string + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: int + expr: _col6 + type: string + expr: _col7 + type: double + expr: _col8 + type: string + expr: _col11 + type: int + expr: _col12 + type: string + expr: _col13 + type: string + expr: _col14 + type: string + expr: _col15 + type: string + expr: _col16 + type: int + expr: _col17 + type: string + expr: _col18 + type: double + expr: _col19 + type: string + p4 + TableScan + alias: p4 + Reduce Output Operator + key expressions: + expr: p_partkey + type: int + sort order: + + Map-reduce partition columns: + expr: p_partkey + type: int + tag: 1 + value expressions: + expr: p_partkey + type: int + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_brand + type: string + expr: p_type + type: string + expr: p_size + type: int + expr: p_container + type: string + expr: p_retailprice + type: double + expr: p_comment + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col22} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41 + Filter Operator + predicate: + expr: (((_col12 = _col1) and (_col22 = _col33)) and (_col22 = _col11)) + type: boolean + Select Operator + expressions: + expr: _col22 + type: int + expr: _col23 + type: string + expr: _col24 + type: string + expr: _col25 + type: string + expr: _col26 + type: string + expr: _col27 + type: int + expr: _col28 + type: string + expr: _col29 + type: double + expr: _col30 + type: string + expr: _col11 + type: int + expr: _col12 + type: string + expr: _col13 + type: string + expr: _col14 + type: string + expr: _col15 + type: string + expr: _col16 + type: int + expr: _col17 + type: string + expr: _col18 + type: double + expr: _col19 + type: string + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: int + expr: _col6 + type: string + expr: _col7 + type: double + expr: _col8 + type: string + expr: _col33 + type: int + expr: _col34 + type: string + expr: _col35 + type: string + expr: _col36 + type: string + expr: _col37 + type: string + expr: _col38 + type: int + expr: _col39 + type: string + expr: _col40 + type: double + expr: _col41 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/sample8.q.out ql/src/test/results/clientpositive/sample8.q.out index 60e0831..1e8bc50 100644 --- ql/src/test/results/clientpositive/sample8.q.out +++ ql/src/test/results/clientpositive/sample8.q.out @@ -35,10 +35,20 @@ STAGE PLANS: Filter Operator isSamplingPred: true predicate: - expr: (((hash(key) & 2147483647) % 1) = 0) + expr: ((((hash(key) & 2147483647) % 10) = 0) and (((hash(key) & 2147483647) % 1) = 0)) type: boolean Reduce Output Operator - sort order: + key expressions: + expr: key + type: string + expr: value + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: value + type: string tag: 0 value expressions: expr: key @@ -56,10 +66,20 @@ STAGE PLANS: Filter Operator isSamplingPred: true predicate: - expr: (((hash(key) & 2147483647) % 10) = 0) + expr: ((((hash(key) & 2147483647) % 1) = 0) and (((hash(key) & 2147483647) % 10) = 0)) type: boolean Reduce Output Operator - sort order: + key expressions: + expr: key + type: string + expr: value + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: value + type: string tag: 1 value expressions: expr: key