diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d39b8bd..654f3b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -553,7 +553,7 @@ public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, function.getType() == HiveParser.TOK_SUBQUERY_EXPR) { function = (ASTNode)function.getChild(0); } - doPhase1GetAllAggregations(function, aggregationTrees, wdwFns); + doPhase1GetAllAggregations(function, aggregationTrees, wdwFns, null); } // window based aggregations are handled differently @@ -604,7 +604,8 @@ private void doPhase1GetColumnAliasesFromSelect( * @throws SemanticException */ private void doPhase1GetAllAggregations(ASTNode expressionTree, - HashMap aggregations, List wdwFns) throws SemanticException { + HashMap aggregations, List wdwFns, + ASTNode wndParent) throws SemanticException { int exprTokenType = expressionTree.getToken().getType(); if(exprTokenType == HiveParser.TOK_SUBQUERY_EXPR) { //since now we have scalar subqueries we can get subquery expression in having @@ -612,6 +613,8 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, return; } + boolean parentIsWindowSpec = wndParent != null; + if (exprTokenType == HiveParser.TOK_FUNCTION || exprTokenType == HiveParser.TOK_FUNCTIONDI || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) { @@ -622,8 +625,9 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, // Further, we will examine its children AST nodes to check whether // there are aggregation functions within wdwFns.add(expressionTree); - doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(expressionTree.getChildCount()-1), - aggregations, wdwFns); + for(Node child : expressionTree.getChildren()) { + doPhase1GetAllAggregations((ASTNode) child, aggregations, wdwFns, expressionTree); + } return; } if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { @@ -633,11 +637,11 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, if (FunctionRegistry.getFunctionInfo(functionName) == null) { throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName)); } - if(FunctionRegistry.impliesOrder(functionName)) { + if(FunctionRegistry.impliesOrder(functionName) && !parentIsWindowSpec) { throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName)); } if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) { - if(containsLeadLagUDF(expressionTree)) { + if(containsLeadLagUDF(expressionTree) && !parentIsWindowSpec) { throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName)); } aggregations.put(expressionTree.toStringTree(), expressionTree); @@ -652,7 +656,7 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, } for (int i = 0; i < expressionTree.getChildCount(); i++) { doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i), - aggregations, wdwFns); + aggregations, wdwFns, wndParent); } } diff --git a/ql/src/test/queries/clientpositive/outer_reference_windowed.q b/ql/src/test/queries/clientpositive/outer_reference_windowed.q new file mode 100644 index 0000000..8ff9934 --- /dev/null +++ b/ql/src/test/queries/clientpositive/outer_reference_windowed.q @@ -0,0 +1,46 @@ +DROP TABLE IF EXISTS e011_01; +DROP TABLE IF EXISTS e011_02; +DROP TABLE IF EXISTS e011_03; + +CREATE TABLE e011_01 ( + c1 decimal(15,2), + c2 decimal(15,2)); + +CREATE TABLE e011_02 ( + c1 decimal(15,2), + c2 decimal(15,2)); + +CREATE TABLE e011_03 ( + c1 decimal(15,2), + c2 decimal(15,2)); + +ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS; +ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS; +ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS; + +set hive.explain.user=false; + +explain select sum(sum(c1)) over() from e011_01; + +explain select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2; + +explain select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2; + +explain select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2; + +explain select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2; diff --git a/ql/src/test/results/clientpositive/outer_reference_windowed.q.out b/ql/src/test/results/clientpositive/outer_reference_windowed.q.out new file mode 100644 index 0000000..941382b --- /dev/null +++ b/ql/src/test/results/clientpositive/outer_reference_windowed.q.out @@ -0,0 +1,710 @@ +PREHOOK: query: DROP TABLE IF EXISTS e011_01 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_01 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS e011_02 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_02 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS e011_03 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_03 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE e011_01 ( + c1 decimal(15,2), + c2 decimal(15,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_01 +POSTHOOK: query: CREATE TABLE e011_01 ( + c1 decimal(15,2), + c2 decimal(15,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_01 +PREHOOK: query: CREATE TABLE e011_02 ( + c1 decimal(15,2), + c2 decimal(15,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_02 +POSTHOOK: query: CREATE TABLE e011_02 ( + c1 decimal(15,2), + c2 decimal(15,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_02 +PREHOOK: query: CREATE TABLE e011_03 ( + c1 decimal(15,2), + c2 decimal(15,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_03 +POSTHOOK: query: CREATE TABLE e011_03 ( + c1 decimal(15,2), + c2 decimal(15,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_03 +PREHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_02 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_02 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_03 +#### A masked pattern was here #### +PREHOOK: query: explain select sum(sum(c1)) over() from e011_01 +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(sum(c1)) over() from e011_01 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: c1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(c1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(25,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(25,2)) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col0 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: c1, c2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(c1) + keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(25,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(25,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: decimal(15,2)) + TableScan + alias: e011_03 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(25,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(25,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan + alias: e011_03 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: decimal(15,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col0) + keys: _col1 (type: decimal(15,2)), _col2 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(25,2)) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: decimal(25,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: decimal(15,2)) + TableScan + alias: e011_03 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: decimal(15,2)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: corr(_col0, _col2) + keys: _col1 (type: decimal(15,2)), _col3 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: corr(VALUE._col0) + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +