diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 987f25d..a2ee8f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -35,8 +35,8 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Queue; import java.util.Map.Entry; +import java.util.Queue; import java.util.Set; import java.util.TreeSet; import java.util.UUID; @@ -69,7 +69,6 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; @@ -191,7 +190,6 @@ import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; @@ -566,10 +564,11 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, assert (expressionTree.getChildCount() != 0); if (expressionTree.getChild(expressionTree.getChildCount()-1).getType() == HiveParser.TOK_WINDOWSPEC) { + // If it is a windowing spec, we include it in the list + // Further, we will examine its children AST nodes to check whether + // there are aggregation functions within wdwFns.add(expressionTree); - return; - } - if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { + } else if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { String functionName = unescapeIdentifier(expressionTree.getChild(0) .getText()); // Validate the function name diff --git ql/src/test/queries/clientpositive/windowing_gby2.q ql/src/test/queries/clientpositive/windowing_gby2.q new file mode 100644 index 0000000..920f723 --- /dev/null +++ ql/src/test/queries/clientpositive/windowing_gby2.q @@ -0,0 +1,41 @@ +set hive.mapred.mode=nonstrict; + +explain +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key; + +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key; + +explain +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int); + +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int); + +explain +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value; + +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value; + +explain +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean; + +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean; diff --git ql/src/test/results/clientpositive/windowing_gby2.q.out ql/src/test/results/clientpositive/windowing_gby2.q.out new file mode 100644 index 0000000..4bd6994 --- /dev/null +++ ql/src/test/results/clientpositive/windowing_gby2.q.out @@ -0,0 +1,652 @@ +PREHOOK: query: explain +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ws + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: key, c_int + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(c_int) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +1 +2 +2 +2 +5 +5 +7 +PREHOOK: query: explain +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ws + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), c_int (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1), sum(_col2) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col0 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: PRECEDING(MAX)~ + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: avg_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +NULL +1.0 +2.0 +3.0 +PREHOOK: query: explain +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t3 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToFloat(c_int) - c_float) (type: float), (UDFToDouble(c_float) / UDFToDouble(c_int)) (type: double), c_int (type: int), ((UDFToDouble(c_float) / UDFToDouble(c_int)) - UDFToDouble(c_int)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3), max(_col4), sum(_col5) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), sum(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: double) + sort order: +- + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: int), VALUE._col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double, _col3: double, _col4: int, _col5: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 DESC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double), rank_window_0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, rank_window_0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: lower(_col1) (type: string), _col3 (type: double) + sort order: ++ + Map-reduce partition columns: lower(_col1) (type: string) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + value expressions: rank_window_0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: double) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col2 (type: string), KEY.reducesinkkey1 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col0, _col2, _col4, _col5, _col6 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: string, _col4: double, _col5: int, _col6: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST + partition by: lower(_col2) + raw input shape: + window functions: + window function definition + alias: dense_rank_window_1 + arguments: _col4 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: int), _col6 (type: double), dense_rank_window_1 (type: int) + outputColumnNames: _col0, _col5, _col6, dense_rank_window_1 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int), _col6 (type: double) + sort order: ++ + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + value expressions: dense_rank_window_1 (type: int), _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col6, _col7 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col6: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col6 + raw input shape: + window functions: + window function definition + alias: percent_rank_window_2 + arguments: _col7 + name: percent_rank + window function: GenericUDAFPercentRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int), percent_rank_window_2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +PREHOOK: query: explain +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +PREHOOK: type: QUERY +POSTHOOK: query: explain +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ws + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), c_int (type: int), c_boolean (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: boolean) + TableScan + alias: wr + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cstring1 is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), cstring1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: boolean), _col3 (type: int), _col1 (type: int) + outputColumnNames: _col2, _col3, _col1 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3), sum(_col1) + keys: _col2 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 0 (type: int), (UDFToDouble(_col1) / UDFToDouble(_col2)) (type: double) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: bigint, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: (UDFToDouble(_col1) / UDFToDouble(_col2)) ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: (UDFToDouble(_col1) / UDFToDouble(_col2)) + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here ####