diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f1441b2..4b0d8f6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12734,23 +12734,53 @@ private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver aggParameters.add(paraExprNode); } - // 2 Determine type of UDAF - // This is the GenericUDAF name - String aggName = unescapeIdentifier(aggAst.getChild(0).getText()); + // 2. Is this distinct UDAF boolean isDistinct = aggAst.getType() == HiveParser.TOK_FUNCTIONDI; - boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; - // 3 Get UDAF Evaluator - Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct); - GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, - aggAst, isDistinct, isAllColumns); - assert (genericUDAFEvaluator != null); + // 3. Determine type of UDAF + TypeInfo udafRetType = null; + + // 3.1 Obtain UDAF name + String aggName = unescapeIdentifier(aggAst.getChild(0).getText()); + + // 3.2 Rank functions type is 'int'/'double' + if (FunctionRegistry.isRankingFunction(aggName)) { + if (aggName.equalsIgnoreCase("percent_rank")) + udafRetType = TypeInfoFactory.doubleTypeInfo; + else + udafRetType = TypeInfoFactory.intTypeInfo; + } else { + // 3.3 Try obtaining UDAF evaluators to determine the ret type + try { + boolean isAllColumns = aggAst.getType() == HiveParser.TOK_FUNCTIONSTAR; - // 4. Get UDAF Info using UDAF Evaluator - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + // 3.3.1 Get UDAF Evaluator + Mode amode = groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct); + GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, + aggParameters, aggAst, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + + // 3.3.2 Get UDAF Info using UDAF Evaluator + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + udafRetType = udaf.returnType; + } catch (Exception e) { + LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName + + ", trying to translate to GenericUDF"); + } + + // 3.4 Try GenericUDF translation + if (udafRetType == null) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + tcCtx.setAllowDistinctFunctions(false); + ExprNodeDesc exp = genExprNodeDesc((ASTNode) aggAst.getChild(0), inputRR, tcCtx); + udafRetType = exp.getTypeInfo(); + } + } - // 5. Construct AggInfo - aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct); + // 4. Construct AggInfo + aInfo = new AggInfo(aggParameters, udafRetType, aggName, isDistinct); return aInfo; } diff --git a/ql/src/test/queries/clientpositive/cbo_correctness.q b/ql/src/test/queries/clientpositive/cbo_correctness.q index 559e4bf..f80cbfd 100644 --- a/ql/src/test/queries/clientpositive/cbo_correctness.q +++ b/ql/src/test/queries/clientpositive/cbo_correctness.q @@ -1,4 +1,5 @@ set hive.cbo.enable=true; +set hive.exec.check.crossproducts=false; drop table if exists t1; drop table if exists t2; @@ -187,9 +188,11 @@ select * from (select key as a, c_int+1 as b, sum(c_int) as c from t1 where (t1. select * from (select key as a, c_int+1 as b, sum(c_int) as c from t1 where (t1.c_int + 1 >= 0) and (t1.c_int > 0 or t1.c_float >= 0) group by c_float, t1.c_int, key having t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc limit 5) t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from t2 where (t2.c_int + 1 >= 0) and (t2.c_int > 0 or t2.c_float >= 0) group by c_float, t2.c_int, key having t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 limit 5) t2 on t1.a=p left outer join t3 on t1.a=key where (b + t2.q >= 0) and (b > 0 or c_int >= 0) group by t3.c_int, c having t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by t3.c_int % c asc, t3.c_int desc limit 5; --- 8. Test UDAF +-- 8. Test UDF/UDAF select count(*), count(c_int), sum(c_int), avg(c_int), max(c_int), min(c_int) from t1; +select count(*), count(c_int), sum(c_int), avg(c_int), max(c_int), min(c_int), case c_int when 0 then 1 when 1 then 2 else 3 end, sum(case c_int when 0 then 1 when 1 then 2 else 3 end) from t1 group by c_int; select * from (select count(*) as a, count(distinct c_int) as b, sum(c_int) as c, avg(c_int) as d, max(c_int) as e, min(c_int) as f from t1) t1; +select * from (select count(*) as a, count(distinct c_int) as b, sum(c_int) as c, avg(c_int) as d, max(c_int) as e, min(c_int) as f, case c_int when 0 then 1 when 1 then 2 else 3 end as g, sum(case c_int when 0 then 1 when 1 then 2 else 3 end) as h from t1 group by c_int) t1; select f,a,e,b from (select count(*) as a, count(c_int) as b, sum(c_int) as c, avg(c_int) as d, max(c_int) as e, min(c_int) as f from t1) t1; select f,a,e,b from (select count(*) as a, count(distinct c_int) as b, sum(distinct c_int) as c, avg(distinct c_int) as d, max(distinct c_int) as e, min(distinct c_int) as f from t1) t1; select count(c_int) as a, avg(c_float), key from t1 group by key; @@ -199,12 +202,13 @@ select count(distinct c_int) as a, avg(c_float) from t1 group by c_float, c_int; -- 9. Test Windowing Functions select count(c_int) over() from t1; -select * from (select count(c_int) over() from t1) t1; -select count(c_int) over(), sum(c_float) over() from t1; +select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from t1; +select * from (select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from t1) t1; select x from (select count(c_int) over() as x, sum(c_float) over() from t1) t1; select * from (select max(c_int) over (partition by key order by value Rows UNBOUNDED PRECEDING), min(c_int) over (partition by key order by value rows current row), count(c_int) over(partition by key order by value ROWS 1 PRECEDING), avg(value) over (partition by key order by value Rows between unbounded preceding and unbounded following), sum(value) over (partition by key order by value rows between unbounded preceding and current row), avg(c_float) over (partition by key order by value Rows between 1 preceding and unbounded following), sum(c_float) over (partition by key order by value rows between 1 preceding and current row), max(c_float) over (partition by key order by value rows between 1 preceding and unbounded following), min(c_float) over (partition by key order by value rows between 1 preceding and 1 following) from t1) t1; select i, a, h, b, c, d, e, f, g, a as x, a +1 as y from (select max(c_int) over (partition by key order by value range UNBOUNDED PRECEDING) a, min(c_int) over (partition by key order by value range current row) b, count(c_int) over(partition by key order by value range 1 PRECEDING) c, avg(value) over (partition by key order by value range between unbounded preceding and unbounded following) d, sum(value) over (partition by key order by value range between unbounded preceding and current row) e, avg(c_float) over (partition by key order by value range between 1 preceding and unbounded following) f, sum(c_float) over (partition by key order by value range between 1 preceding and current row) g, max(c_float) over (partition by key order by value range between 1 preceding and unbounded following) h, min(c_float) over (partition by key order by value range between 1 preceding and 1 following) i from t1) t1; +-- 10. Test views create view v1 as select c_int, value, c_boolean, dt from t1; create view v2 as select c_int, value from t2; @@ -218,7 +222,6 @@ select count(*) from v1 a join v1 b on a.value = b.value; create view v3 as select v1.value val from v1 join t1 on v1.c_boolean = t1.c_boolean; --- 10. view chaining select count(val) from v3 where val != '1'; with q1 as ( select key from t1 where key = '1') select count(*) from q1; @@ -296,7 +299,7 @@ part where part.p_size not in (select avg(p_size) from (select p_size from part) a where p_size < 10 - ) + ) order by p_name ; -- agg, corr @@ -305,7 +308,7 @@ from part b where b.p_size not in (select min(p_size) from (select p_mfgr, p_size from part) a where p_size < 10 and b.p_mfgr = a.p_mfgr - ) + ) order by p_name ; -- non agg, non corr, Group By in Parent Query @@ -374,7 +377,6 @@ where li.l_linenumber = 1 and -- Stage 2: group by Stage 1 o/p -- Stage 5: group by on sq2:src_cbo (subquery in having) -- Stage 6: Stage 2 o/p semijoin Stage 5 -explain select key, value, count(*) from src_cbo b where b.key in (select key from src_cbo where src_cbo.key > '8') @@ -383,7 +385,6 @@ having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by ; -- non agg, non corr, windowing -explain select p_mfgr, p_name, avg(p_size) from part group by p_mfgr, p_name diff --git a/ql/src/test/results/clientpositive/cbo_correctness.q.out b/ql/src/test/results/clientpositive/cbo_correctness.q.out index b430d00..161d3ff 100644 --- a/ql/src/test/results/clientpositive/cbo_correctness.q.out +++ b/ql/src/test/results/clientpositive/cbo_correctness.q.out @@ -899,7 +899,6 @@ NULL NULL NULL NULL NULL NULL NULL NULL -Warning: Shuffle Join JOIN[7][tables = [$hdt$_23, $hdt$_24]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select t1.key from t1 join t3 PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -16135,19 +16134,31 @@ POSTHOOK: Input: default@t3 #### A masked pattern was here #### 1 12 1 2 -PREHOOK: query: -- 8. Test UDAF +PREHOOK: query: -- 8. Test UDF/UDAF select count(*), count(c_int), sum(c_int), avg(c_int), max(c_int), min(c_int) from t1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t1@dt=2014 #### A masked pattern was here #### -POSTHOOK: query: -- 8. Test UDAF +POSTHOOK: query: -- 8. Test UDF/UDAF select count(*), count(c_int), sum(c_int), avg(c_int), max(c_int), min(c_int) from t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@dt=2014 #### A masked pattern was here #### 20 18 18 1.0 1 1 +PREHOOK: query: select count(*), count(c_int), sum(c_int), avg(c_int), max(c_int), min(c_int), case c_int when 0 then 1 when 1 then 2 else 3 end, sum(case c_int when 0 then 1 when 1 then 2 else 3 end) from t1 group by c_int +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: select count(*), count(c_int), sum(c_int), avg(c_int), max(c_int), min(c_int), case c_int when 0 then 1 when 1 then 2 else 3 end, sum(case c_int when 0 then 1 when 1 then 2 else 3 end) from t1 group by c_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@dt=2014 +#### A masked pattern was here #### +2 0 NULL NULL NULL NULL 3 6 +18 18 18 1.0 1 1 2 36 PREHOOK: query: select * from (select count(*) as a, count(distinct c_int) as b, sum(c_int) as c, avg(c_int) as d, max(c_int) as e, min(c_int) as f from t1) t1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -16159,6 +16170,18 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@dt=2014 #### A masked pattern was here #### 20 1 18 1.0 1 1 +PREHOOK: query: select * from (select count(*) as a, count(distinct c_int) as b, sum(c_int) as c, avg(c_int) as d, max(c_int) as e, min(c_int) as f, case c_int when 0 then 1 when 1 then 2 else 3 end as g, sum(case c_int when 0 then 1 when 1 then 2 else 3 end) as h from t1 group by c_int) t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select count(*) as a, count(distinct c_int) as b, sum(c_int) as c, avg(c_int) as d, max(c_int) as e, min(c_int) as f, case c_int when 0 then 1 when 1 then 2 else 3 end as g, sum(case c_int when 0 then 1 when 1 then 2 else 3 end) as h from t1 group by c_int) t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@dt=2014 +#### A masked pattern was here #### +2 0 NULL NULL NULL NULL 3 6 +18 1 18 1.0 1 1 2 36 PREHOOK: query: select f,a,e,b from (select count(*) as a, count(c_int) as b, sum(c_int) as c, avg(c_int) as d, max(c_int) as e, min(c_int) as f from t1) t1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -16264,66 +16287,66 @@ POSTHOOK: Input: default@t1@dt=2014 18 18 18 -PREHOOK: query: select * from (select count(c_int) over() from t1) t1 +PREHOOK: query: select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from t1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t1@dt=2014 #### A masked pattern was here #### -POSTHOOK: query: select * from (select count(c_int) over() from t1) t1 +POSTHOOK: query: select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@dt=2014 #### A masked pattern was here #### -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -PREHOOK: query: select count(c_int) over(), sum(c_float) over() from t1 +18 18.0 1 1 1 1 1 0.0 1 NULL +18 18.0 1 1 2 1 1 0.0 1 NULL +18 18.0 1 1 3 1 1 0.0 1 NULL +18 18.0 1 1 4 1 1 0.0 1 NULL +18 18.0 1 1 5 1 1 0.0 1 1.0 +18 18.0 1 1 6 1 1 0.0 1 1.0 +18 18.0 1 1 7 1 1 0.0 1 1.0 +18 18.0 1 1 8 1 1 0.0 1 1.0 +18 18.0 1 1 9 1 1 0.0 1 1.0 +18 18.0 1 1 10 1 1 0.0 1 1.0 +18 18.0 1 1 11 1 1 0.0 1 1.0 +18 18.0 1 1 12 1 1 0.0 1 1.0 +18 18.0 1 1 13 1 1 0.0 1 1.0 +18 18.0 1 1 14 1 1 0.0 1 1.0 +18 18.0 1 1 15 1 1 0.0 1 1.0 +18 18.0 1 1 16 1 1 0.0 1 1.0 +18 18.0 1 1 17 1 1 0.0 1 1.0 +18 18.0 1 1 18 1 1 0.0 1 1.0 +18 18.0 1 1 19 1 1 0.0 1 1.0 +18 18.0 1 1 20 1 1 0.0 1 1.0 +PREHOOK: query: select * from (select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from t1) t1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t1@dt=2014 #### A masked pattern was here #### -POSTHOOK: query: select count(c_int) over(), sum(c_float) over() from t1 +POSTHOOK: query: select * from (select count(c_int) over(), sum(c_float) over(), max(c_int) over(), min(c_int) over(), row_number() over(), rank() over(), dense_rank() over(), percent_rank() over(), lead(c_int, 2, c_int) over(), lag(c_float, 2, c_float) over() from t1) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@dt=2014 #### A masked pattern was here #### -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 -18 18.0 +18 18.0 1 1 1 1 1 0.0 1 NULL +18 18.0 1 1 2 1 1 0.0 1 NULL +18 18.0 1 1 3 1 1 0.0 1 NULL +18 18.0 1 1 4 1 1 0.0 1 NULL +18 18.0 1 1 5 1 1 0.0 1 1.0 +18 18.0 1 1 6 1 1 0.0 1 1.0 +18 18.0 1 1 7 1 1 0.0 1 1.0 +18 18.0 1 1 8 1 1 0.0 1 1.0 +18 18.0 1 1 9 1 1 0.0 1 1.0 +18 18.0 1 1 10 1 1 0.0 1 1.0 +18 18.0 1 1 11 1 1 0.0 1 1.0 +18 18.0 1 1 12 1 1 0.0 1 1.0 +18 18.0 1 1 13 1 1 0.0 1 1.0 +18 18.0 1 1 14 1 1 0.0 1 1.0 +18 18.0 1 1 15 1 1 0.0 1 1.0 +18 18.0 1 1 16 1 1 0.0 1 1.0 +18 18.0 1 1 17 1 1 0.0 1 1.0 +18 18.0 1 1 18 1 1 0.0 1 1.0 +18 18.0 1 1 19 1 1 0.0 1 1.0 +18 18.0 1 1 20 1 1 0.0 1 1.0 PREHOOK: query: select x from (select count(c_int) over() as x, sum(c_float) over() from t1) t1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -16414,10 +16437,12 @@ POSTHOOK: Input: default@t1@dt=2014 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 NULL NULL NULL NULL 0 NULL 0.0 NULL NULL NULL NULL NULL NULL NULL NULL 0 NULL 0.0 NULL NULL NULL NULL -PREHOOK: query: create view v1 as select c_int, value, c_boolean, dt from t1 +PREHOOK: query: -- 10. Test views +create view v1 as select c_int, value, c_boolean, dt from t1 PREHOOK: type: CREATEVIEW PREHOOK: Input: default@t1 -POSTHOOK: query: create view v1 as select c_int, value, c_boolean, dt from t1 +POSTHOOK: query: -- 10. Test views +create view v1 as select c_int, value, c_boolean, dt from t1 POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@t1 POSTHOOK: Output: default@v1 @@ -16515,16 +16540,14 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@t1 POSTHOOK: Input: default@v1 POSTHOOK: Output: default@v3 -PREHOOK: query: -- 10. view chaining -select count(val) from v3 where val != '1' +PREHOOK: query: select count(val) from v3 where val != '1' PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t1@dt=2014 PREHOOK: Input: default@v1 PREHOOK: Input: default@v3 #### A masked pattern was here #### -POSTHOOK: query: -- 10. view chaining -select count(val) from v3 where val != '1' +POSTHOOK: query: select count(val) from v3 where val != '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@dt=2014 @@ -18056,7 +18079,6 @@ POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t2@dt=2014 #### A masked pattern was here #### 400 -Warning: Shuffle Join JOIN[24][tables = [$hdt$_519, $hdt$_524]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- 16. SubQueries Not In -- non agg, non corr select * @@ -18198,7 +18220,6 @@ POSTHOOK: Input: default@src_cbo 199 val_199 199 val_199 2 val_2 -Warning: Shuffle Join JOIN[26][tables = [$hdt$_530, $hdt$_536]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- non agg, corr select p_mfgr, b.p_name, p_size from part b @@ -18239,7 +18260,6 @@ Manufacturer#2 almond aquamarine rose maroon antique 25 Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 Manufacturer#4 almond azure aquamarine papaya violet 12 Manufacturer#5 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join JOIN[39][tables = [$hdt$_543, $hdt$_547, $hdt$_554]] in Stage 'Stage-3:MAPRED' is a cross product PREHOOK: query: -- agg, non corr select p_name, p_size from @@ -18247,7 +18267,7 @@ part where part.p_size not in (select avg(p_size) from (select p_size from part) a where p_size < 10 - ) + ) order by p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### @@ -18258,44 +18278,43 @@ part where part.p_size not in (select avg(p_size) from (select p_size from part) a where p_size < 10 - ) + ) order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -almond aquamarine dodger light gainsboro 46 +almond antique blue firebrick mint 31 +almond antique burnished rose metallic 2 +almond antique burnished rose metallic 2 +almond antique chartreuse khaki white 17 +almond antique chartreuse lavender yellow 34 +almond antique forest lavender goldenrod 14 +almond antique gainsboro frosted violet 10 +almond antique medium spring khaki 6 +almond antique metallic orange dim 19 +almond antique misty red olive 1 almond antique olive coral navajo 45 -almond aquamarine pink moccasin thistle 42 -almond antique violet turquoise frosted 40 +almond antique salmon chartreuse burlywood 6 +almond antique sky peru orange 2 +almond antique violet chocolate turquoise 14 almond antique violet mint lemon 39 -almond antique chartreuse lavender yellow 34 -almond antique blue firebrick mint 31 +almond antique violet turquoise frosted 40 almond aquamarine burnished black steel 28 +almond aquamarine dodger light gainsboro 46 almond aquamarine floral ivory bisque 27 +almond aquamarine midnight light salmon 2 +almond aquamarine pink moccasin thistle 42 almond aquamarine rose maroon antique 25 -almond azure blanched chiffon midnight 23 -almond antique metallic orange dim 19 almond aquamarine sandy cyan gainsboro 18 -almond antique chartreuse khaki white 17 -almond antique violet chocolate turquoise 14 -almond antique forest lavender goldenrod 14 -almond azure aquamarine papaya violet 12 -almond antique gainsboro frosted violet 10 almond aquamarine yellow dodger mint 7 -almond antique salmon chartreuse burlywood 6 -almond antique medium spring khaki 6 -almond antique burnished rose metallic 2 -almond aquamarine midnight light salmon 2 -almond antique sky peru orange 2 -almond antique burnished rose metallic 2 -almond antique misty red olive 1 -Warning: Shuffle Join JOIN[36][tables = [$hdt$_558, $hdt$_565]] in Stage 'Stage-3:MAPRED' is a cross product +almond azure aquamarine papaya violet 12 +almond azure blanched chiffon midnight 23 PREHOOK: query: -- agg, corr select p_mfgr, p_name, p_size from part b where b.p_size not in (select min(p_size) from (select p_mfgr, p_size from part) a where p_size < 10 and b.p_mfgr = a.p_mfgr - ) + ) order by p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### @@ -18305,31 +18324,30 @@ from part b where b.p_size not in (select min(p_size) from (select p_mfgr, p_size from part) a where p_size < 10 and b.p_mfgr = a.p_mfgr - ) + ) order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#1 almond antique salmon chartreuse burlywood 6 -Manufacturer#5 almond antique medium spring khaki 6 -Manufacturer#4 almond antique gainsboro frosted violet 10 -Manufacturer#4 almond azure aquamarine papaya violet 12 -Manufacturer#2 almond antique violet chocolate turquoise 14 -Manufacturer#3 almond antique forest lavender goldenrod 14 -Manufacturer#3 almond antique chartreuse khaki white 17 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 -Manufacturer#3 almond antique metallic orange dim 19 -Manufacturer#5 almond azure blanched chiffon midnight 23 -Manufacturer#2 almond aquamarine rose maroon antique 25 -Manufacturer#4 almond aquamarine floral ivory bisque 27 -Manufacturer#1 almond aquamarine burnished black steel 28 Manufacturer#5 almond antique blue firebrick mint 31 +Manufacturer#3 almond antique chartreuse khaki white 17 Manufacturer#1 almond antique chartreuse lavender yellow 34 +Manufacturer#3 almond antique forest lavender goldenrod 14 +Manufacturer#4 almond antique gainsboro frosted violet 10 +Manufacturer#5 almond antique medium spring khaki 6 +Manufacturer#3 almond antique metallic orange dim 19 +Manufacturer#3 almond antique olive coral navajo 45 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 Manufacturer#4 almond antique violet mint lemon 39 Manufacturer#2 almond antique violet turquoise frosted 40 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 -Manufacturer#3 almond antique olive coral navajo 45 +Manufacturer#1 almond aquamarine burnished black steel 28 Manufacturer#5 almond aquamarine dodger light gainsboro 46 -Warning: Shuffle Join JOIN[24][tables = [$hdt$_573, $hdt$_578]] in Stage 'Stage-3:MAPRED' is a cross product +Manufacturer#4 almond aquamarine floral ivory bisque 27 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#2 almond aquamarine rose maroon antique 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 +Manufacturer#4 almond azure aquamarine papaya violet 12 +Manufacturer#5 almond azure blanched chiffon midnight 23 PREHOOK: query: -- non agg, non corr, Group By in Parent Query select li.l_partkey, count(*) from lineitem li @@ -18364,7 +18382,6 @@ POSTHOOK: Input: default@lineitem 139636 1 175839 1 182052 1 -Warning: Shuffle Join JOIN[41][tables = [$hdt$_586, $hdt$_593]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- add null check test from sq_notin.q once HIVE-7721 resolved. -- non agg, corr, having @@ -18395,7 +18412,6 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### Manufacturer#1 1173.15 Manufacturer#2 1690.68 -Warning: Shuffle Join JOIN[39][tables = [$hdt$_602, $hdt$_608]] in Stage 'Stage-3:MAPRED' is a cross product PREHOOK: query: -- agg, non corr, having select b.p_mfgr, min(p_retailprice) from part b @@ -18514,373 +18530,65 @@ PREHOOK: query: -- where and having -- Stage 2: group by Stage 1 o/p -- Stage 5: group by on sq2:src_cbo (subquery in having) -- Stage 6: Stage 2 o/p semijoin Stage 5 -explain select key, value, count(*) from src_cbo b where b.key in (select key from src_cbo where src_cbo.key > '8') group by key, value having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key ) PREHOOK: type: QUERY +PREHOOK: Input: default@src_cbo +#### A masked pattern was here #### POSTHOOK: query: -- where and having -- Plan is: -- Stage 1: b semijoin sq1:src_cbo (subquery in where) -- Stage 2: group by Stage 1 o/p -- Stage 5: group by on sq2:src_cbo (subquery in having) -- Stage 6: Stage 2 o/p semijoin Stage 5 -explain select key, value, count(*) from src_cbo b where b.key in (select key from src_cbo where src_cbo.key > '8') group by key, value having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key ) POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - TableScan - alias: src_cbo - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((key > '8') and key is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1202 Data size: 213956 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1202 Data size: 213956 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1202 Data size: 223572 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1202 Data size: 223572 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: bigint) - sort order: + - Map-reduce partition columns: _col2 (type: bigint) - Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +POSTHOOK: Input: default@src_cbo +#### A masked pattern was here #### +80 val_80 1 +96 val_96 1 +92 val_92 1 +9 val_9 1 +87 val_87 1 +86 val_86 1 +85 val_85 1 +82 val_82 1 +84 val_84 2 +95 val_95 2 +83 val_83 2 +98 val_98 2 +97 val_97 2 +90 val_90 3 PREHOOK: query: -- non agg, non corr, windowing -explain select p_mfgr, p_name, avg(p_size) from part group by p_mfgr, p_name having p_name in (select first_value(p_name) over(partition by p_mfgr order by p_size) from part) PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### POSTHOOK: query: -- non agg, non corr, windowing -explain select p_mfgr, p_name, avg(p_size) from part group by p_mfgr, p_name having p_name in (select first_value(p_name) over(partition by p_mfgr order by p_size) from part) POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _wcol0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: _wcol0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: double) - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: avg(_col2) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col2 (type: struct) - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2.0 +Manufacturer#3 almond antique misty red olive 1.0 +Manufacturer#5 almond antique sky peru orange 2.0 +Manufacturer#2 almond aquamarine midnight light salmon 2.0 +Manufacturer#4 almond aquamarine yellow dodger mint 7.0 PREHOOK: query: -- 18. SubQueries Not Exists -- distinct, corr select *