diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 9d27b8d5c0..f71ed3d240 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -792,6 +792,7 @@ minillaplocal.query.files=\ semijoin_reddedup,\ semijoin_reddedup.q,\ sharedworkext.q,\ + sharedworkresidual.q,\ smb_cache.q,\ special_character_in_tabnames_1.q,\ sqlmerge.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java index 2c93c2a760..523fe54325 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java @@ -749,6 +749,7 @@ public boolean isSame(OperatorDesc other) { Objects.equals(getFiltersStringMap(), otherDesc.getFiltersStringMap()) && Objects.equals(getOutputColumnNames(), otherDesc.getOutputColumnNames()) && Objects.equals(getCondsList(), otherDesc.getCondsList()) && + Objects.equals(getResidualFilterExprsString(), otherDesc.getResidualFilterExprsString()) && getHandleSkewJoin() == otherDesc.getHandleSkewJoin() && Objects.equals(getNullSafeString(), otherDesc.getNullSafeString()); } diff --git a/ql/src/test/queries/clientpositive/sharedworkresidual.q b/ql/src/test/queries/clientpositive/sharedworkresidual.q new file mode 100644 index 0000000000..e8a4b37b01 --- /dev/null +++ b/ql/src/test/queries/clientpositive/sharedworkresidual.q @@ -0,0 +1,46 @@ +CREATE TABLE dimension_date ( + id int, + dateid int, + completedate string, + daynumberinweek tinyint, + dayfullname string, + daynumberinmonth tinyint, + daynumberinyear int, + weeknumberinyear tinyint, + monthnumberinyear tinyint, + monthfullname string, + quarternumber tinyint, + quartername string, + yearnumber int, + weekstartdate string, + weekstartdateid int, + monthstartdate string, + monthstartdateid int); + + +explain + with daily as ( + select * + from dimension_date + where dateid = 20200228 + ), + weekly as ( + select dly.dateid,count(1) as mb_wk + from dimension_date dly + left join dimension_date wk + ON datediff(dly.completedate, wk.completedate) >= 0 + AND datediff(dly.completedate, wk.completedate) < 6 + GROUP BY dly.dateid + ), + monthly as ( + select dly.dateid,count(1) as nb_monthly + from dimension_date dly + left join dimension_date wk + ON datediff(dly.completedate, wk.completedate) >= 0 + AND datediff(dly.completedate, wk.completedate) < 28 + GROUP BY dly.dateid + ) + select daily.dateid,mb_wk,nb_monthly + from daily + left join weekly on daily.dateid = weekly.dateid + left join monthly on daily.dateid = monthly.dateid; diff --git a/ql/src/test/results/clientpositive/llap/sharedworkresidual.q.out b/ql/src/test/results/clientpositive/llap/sharedworkresidual.q.out new file mode 100644 index 0000000000..20eb24524c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/sharedworkresidual.q.out @@ -0,0 +1,321 @@ +PREHOOK: query: CREATE TABLE dimension_date ( + id int, + dateid int, + completedate string, + daynumberinweek tinyint, + dayfullname string, + daynumberinmonth tinyint, + daynumberinyear int, + weeknumberinyear tinyint, + monthnumberinyear tinyint, + monthfullname string, + quarternumber tinyint, + quartername string, + yearnumber int, + weekstartdate string, + weekstartdateid int, + monthstartdate string, + monthstartdateid int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dimension_date +POSTHOOK: query: CREATE TABLE dimension_date ( + id int, + dateid int, + completedate string, + daynumberinweek tinyint, + dayfullname string, + daynumberinmonth tinyint, + daynumberinyear int, + weeknumberinyear tinyint, + monthnumberinyear tinyint, + monthfullname string, + quarternumber tinyint, + quartername string, + yearnumber int, + weekstartdate string, + weekstartdateid int, + monthstartdate string, + monthstartdateid int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dimension_date +Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[45][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: explain + with daily as ( + select * + from dimension_date + where dateid = 20200228 + ), + weekly as ( + select dly.dateid,count(1) as mb_wk + from dimension_date dly + left join dimension_date wk + ON datediff(dly.completedate, wk.completedate) >= 0 + AND datediff(dly.completedate, wk.completedate) < 6 + GROUP BY dly.dateid + ), + monthly as ( + select dly.dateid,count(1) as nb_monthly + from dimension_date dly + left join dimension_date wk + ON datediff(dly.completedate, wk.completedate) >= 0 + AND datediff(dly.completedate, wk.completedate) < 28 + GROUP BY dly.dateid + ) + select daily.dateid,mb_wk,nb_monthly + from daily + left join weekly on daily.dateid = weekly.dateid + left join monthly on daily.dateid = monthly.dateid +PREHOOK: type: QUERY +PREHOOK: Input: default@dimension_date +#### A masked pattern was here #### +POSTHOOK: query: explain + with daily as ( + select * + from dimension_date + where dateid = 20200228 + ), + weekly as ( + select dly.dateid,count(1) as mb_wk + from dimension_date dly + left join dimension_date wk + ON datediff(dly.completedate, wk.completedate) >= 0 + AND datediff(dly.completedate, wk.completedate) < 6 + GROUP BY dly.dateid + ), + monthly as ( + select dly.dateid,count(1) as nb_monthly + from dimension_date dly + left join dimension_date wk + ON datediff(dly.completedate, wk.completedate) >= 0 + AND datediff(dly.completedate, wk.completedate) < 28 + GROUP BY dly.dateid + ) + select daily.dateid,mb_wk,nb_monthly + from daily + left join weekly on daily.dateid = weekly.dateid + left join monthly on daily.dateid = monthly.dateid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dimension_date +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Map 9 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Map 4 (CUSTOM_SIMPLE_EDGE), Map 9 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dimension_date + filterExpr: (dateid = 20200228) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (dateid = 20200228) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: dly + filterExpr: (dateid = 20200228) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (dateid = 20200228) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: completedate (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 9 + Map Operator Tree: + TableScan + alias: wk + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: completedate (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 378 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 378 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 20200228 (type: int), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + residual filter predicates: {(datediff(_col0, _col1) >= 0)} {(datediff(_col0, _col1) < 6)} + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: true (type: boolean) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + residual filter predicates: {(datediff(_col0, _col1) >= 0)} {(datediff(_col0, _col1) < 28)} + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: true (type: boolean) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 8 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +