diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java index 7ce924aef7..07ce76287a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java @@ -36,7 +36,6 @@ import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.ImmutableIntList; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -72,6 +71,26 @@ private HiveSemiJoinRule(RelOptRuleOperand operand, RelBuilderFactory relBuilder super(operand, relBuilder, null); } + private RelNode buildProject(final Aggregate aggregate, RexBuilder rexBuilder, RelBuilder relBuilder) { + assert(!aggregate.indicator && aggregate.getAggCallList().isEmpty()); + RelNode input = aggregate.getInput(); + List groupingKeys = aggregate.getGroupSet().asList(); + List projects = new ArrayList<>(); + for(Integer keys:groupingKeys) { + projects.add(rexBuilder.makeInputRef(input, keys.intValue())); + } + return relBuilder.push(aggregate.getInput()).project(projects).build(); + } + + private boolean needProject(final RelNode input, final RelNode aggregate) { + if((input instanceof HepRelVertex + && ((HepRelVertex)input).getCurrentRel() instanceof Join) + || input.getRowType().getFieldCount() != aggregate.getRowType().getFieldCount()) { + return true; + } + return false; + } + protected void perform(RelOptRuleCall call, ImmutableBitSet topRefs, RelNode topOperator, Join join, RelNode left, Aggregate aggregate) { LOG.debug("Matched HiveSemiJoinRule"); @@ -107,29 +126,14 @@ protected void perform(RelOptRuleCall call, ImmutableBitSet topRefs, for (int key : joinInfo.rightKeys) { newRightKeyBuilder.add(aggregateKeys.get(key)); } - final ImmutableIntList newRightKeys = - ImmutableIntList.copyOf(newRightKeyBuilder); - final RelNode newRight = aggregate.getInput(); + RelNode input = aggregate.getInput(); + final RelNode newRight = needProject(input, aggregate) ? + buildProject(aggregate, rexBuilder, call.builder()) : input; final RexNode newCondition = RelOptUtil.createEquiJoinCondition(left, joinInfo.leftKeys, newRight, - newRightKeys, rexBuilder); - - RelNode semi = null; - //HIVE-15458: we need to add a Project on top of Join since SemiJoin with Join as it's right input - // is not expected further down the pipeline. see jira for more details - if(aggregate.getInput() instanceof HepRelVertex - && ((HepRelVertex)aggregate.getInput()).getCurrentRel() instanceof Join) { - Join rightJoin = (Join)(((HepRelVertex)aggregate.getInput()).getCurrentRel()); - List projects = new ArrayList<>(); - for(int i=0; i 1) +select count(1) from part pp where pp.p_partkey IN (select p_partkey from ss); + +with ss as +(select count(1), p_partkey, p_name from + part group by p_partkey ,p_name + having count(1) > 1) +select count(1) from part pp where pp.p_partkey IN (select p_partkey from ss); diff --git a/ql/src/test/results/clientpositive/llap/semijoin.q.out b/ql/src/test/results/clientpositive/llap/semijoin.q.out index a1e4788233..22b867aa1a 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin.q.out @@ -3260,11 +3260,11 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### CBO PLAN: HiveProject(p_partkey=[$0]) - HiveSemiJoin(condition=[=($1, $3)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) HiveProject(p_partkey=[$0], p_name=[$1]) HiveFilter(condition=[IS NOT NULL($1)]) HiveTableScan(table=[[default, part]], table:alias=[pp]) - HiveProject(p_partkey=[$0], p_name=[$1], p_mfgr=[$2], p_brand=[$3], p_type=[$4], p_size=[$5], p_container=[$6], p_retailprice=[$7], p_comment=[$8], BLOCK__OFFSET__INSIDE__FILE=[$9], INPUT__FILE__NAME=[$10], ROW__ID=[$11]) + HiveProject(p_name=[$1]) HiveFilter(condition=[IS NOT NULL($1)]) HiveTableScan(table=[[default, part]], table:alias=[part]) @@ -3302,3 +3302,50 @@ POSTHOOK: Input: default@part 85768 86428 90681 +PREHOOK: query: explain cbo +with ss as +(select count(1), p_partkey, p_name from + part group by p_partkey ,p_name + having count(1) > 1) +select count(1) from part pp where pp.p_partkey IN (select p_partkey from ss) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +with ss as +(select count(1), p_partkey, p_name from + part group by p_partkey ,p_name + having count(1) > 1) +select count(1) from part pp where pp.p_partkey IN (select p_partkey from ss) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveSemiJoin(condition=[=($0, $1)], joinType=[inner]) + HiveProject(p_partkey=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[pp]) + HiveProject(p_partkey=[$0]) + HiveFilter(condition=[>($2, 1)]) + HiveAggregate(group=[{0, 1}], agg#0=[count()]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +PREHOOK: query: with ss as +(select count(1), p_partkey, p_name from + part group by p_partkey ,p_name + having count(1) > 1) +select count(1) from part pp where pp.p_partkey IN (select p_partkey from ss) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: with ss as +(select count(1), p_partkey, p_name from + part group by p_partkey ,p_name + having count(1) > 1) +select count(1) from part pp where pp.p_partkey IN (select p_partkey from ss) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +2 diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out index c09da86ce1..c85f42c4fd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out @@ -242,7 +242,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f0=[$4], $f1=[$5], $f2=[$6]) + HiveProject(i_item_sk=[$0]) HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($0))]) @@ -336,7 +336,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f0=[$4], $f1=[$5], $f2=[$6]) + HiveProject(i_item_sk=[$0]) HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($0))]) @@ -430,7 +430,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f0=[$4], $f1=[$5], $f2=[$6]) + HiveProject(i_item_sk=[$0]) HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($0))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out index 0e61e45ad5..8211db1c95 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out @@ -164,11 +164,11 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f1=[$1], CAST=[CAST($1):DOUBLE]) @@ -188,11 +188,11 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f1=[$1], CAST=[CAST($1):DOUBLE]) @@ -212,11 +212,11 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out index e5b0d19715..7e4644ac69 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out @@ -125,7 +125,7 @@ HiveSortLimit(fetch=[100]) HiveProject($f0=[$0]) HiveUnion(all=[true]) HiveProject($f0=[*(CAST($4):DECIMAL(10, 0), $5)]) - HiveSemiJoin(condition=[=($3, $8)], joinType=[inner]) + HiveSemiJoin(condition=[=($3, $7)], joinType=[inner]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0]) HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -169,7 +169,7 @@ HiveSortLimit(fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(substr=[$0], i_item_sk=[$1], d_date=[$2], $f3=[$3]) + HiveProject(i_item_sk=[$1]) HiveFilter(condition=[>($3, 4)]) HiveProject(substr=[$2], i_item_sk=[$1], d_date=[$0], $f3=[$3]) HiveAggregate(group=[{3, 4, 5}], agg#0=[count()]) @@ -184,7 +184,7 @@ HiveSortLimit(fetch=[100]) HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject($f0=[*(CAST($4):DECIMAL(10, 0), $5)]) - HiveSemiJoin(condition=[=($2, $8)], joinType=[inner]) + HiveSemiJoin(condition=[=($2, $7)], joinType=[inner]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0]) HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -228,7 +228,7 @@ HiveSortLimit(fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(substr=[$0], i_item_sk=[$1], d_date=[$2], $f3=[$3]) + HiveProject(i_item_sk=[$1]) HiveFilter(condition=[>($3, 4)]) HiveProject(substr=[$2], i_item_sk=[$1], d_date=[$0], $f3=[$3]) HiveAggregate(group=[{3, 4, 5}], agg#0=[count()]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out index ef313ad874..58ed6dabe7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out @@ -160,11 +160,11 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) @@ -182,11 +182,11 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) @@ -204,11 +204,11 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) diff --git a/ql/src/test/results/clientpositive/spark/semijoin.q.out b/ql/src/test/results/clientpositive/spark/semijoin.q.out index 38db76dc11..8a2b12feb3 100644 --- a/ql/src/test/results/clientpositive/spark/semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -2931,11 +2931,11 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### CBO PLAN: HiveProject(p_partkey=[$0]) - HiveSemiJoin(condition=[=($1, $3)], joinType=[inner]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[inner]) HiveProject(p_partkey=[$0], p_name=[$1]) HiveFilter(condition=[IS NOT NULL($1)]) HiveTableScan(table=[[default, part]], table:alias=[pp]) - HiveProject(p_partkey=[$0], p_name=[$1], p_mfgr=[$2], p_brand=[$3], p_type=[$4], p_size=[$5], p_container=[$6], p_retailprice=[$7], p_comment=[$8], BLOCK__OFFSET__INSIDE__FILE=[$9], INPUT__FILE__NAME=[$10], ROW__ID=[$11]) + HiveProject(p_name=[$1]) HiveFilter(condition=[IS NOT NULL($1)]) HiveTableScan(table=[[default, part]], table:alias=[part]) @@ -2973,3 +2973,50 @@ POSTHOOK: Input: default@part 85768 86428 90681 +PREHOOK: query: explain cbo +with ss as +(select count(1), p_partkey, p_name from + part group by p_partkey ,p_name + having count(1) > 1) +select count(1) from part pp where pp.p_partkey IN (select p_partkey from ss) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +with ss as +(select count(1), p_partkey, p_name from + part group by p_partkey ,p_name + having count(1) > 1) +select count(1) from part pp where pp.p_partkey IN (select p_partkey from ss) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveSemiJoin(condition=[=($0, $1)], joinType=[inner]) + HiveProject(p_partkey=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[pp]) + HiveProject(p_partkey=[$0]) + HiveFilter(condition=[>($2, 1)]) + HiveAggregate(group=[{0, 1}], agg#0=[count()]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +PREHOOK: query: with ss as +(select count(1), p_partkey, p_name from + part group by p_partkey ,p_name + having count(1) > 1) +select count(1) from part pp where pp.p_partkey IN (select p_partkey from ss) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: with ss as +(select count(1), p_partkey, p_name from + part group by p_partkey ,p_name + having count(1) > 1) +select count(1) from part pp where pp.p_partkey IN (select p_partkey from ss) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +2