diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java index e3d3ce6d153200beae7ba4d4d3a91ff3f901c747..135b90b3a4333d3e5dbe477764a82d091b4a8065 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java @@ -30,8 +30,10 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.PTFOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; @@ -111,6 +113,19 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // For RS-SEL-RS case. reducer operator in reducer task cannot be null in task compiler return null; } + List> ancestorList = new ArrayList>(); + ancestorList.addAll(sel.getParentOperators()); + while (!ancestorList.isEmpty()) { + Operator curParent = ancestorList.remove(0); + // PTF need a SelectOp. + if ((curParent instanceof PTFOperator)) { + return null; + } + if ((curParent instanceof FilterOperator) && curParent.getParentOperators() != null) { + ancestorList.addAll(curParent.getParentOperators()); + } + } + if(sel.isIdentitySelect()) { parent.removeChildAndAdoptItsChildren(sel); LOG.debug("Identity project remover optimization removed : " + sel); diff --git a/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q b/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q new file mode 100644 index 0000000000000000000000000000000000000000..61d034e37f35363163670982863a0c082b49dc43 --- /dev/null +++ b/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q @@ -0,0 +1,61 @@ +create table tlb1 (id int, fkey int, val string); +create table tlb2 (fid int, name string); +insert into table tlb1 values(100,1,'abc'); +insert into table tlb1 values(200,1,'efg'); +insert into table tlb2 values(1, 'key1'); + +explain +select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey; + +select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey; + +explain +select ddd.id, ddd.fkey, aaa.name, ddd.rnum +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey; + +select ddd.id, ddd.fkey, aaa.name, ddd.rnum +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey; + + +set hive.optimize.ppd=false; + +explain +select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey; + +select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey; + + diff --git a/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out b/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out new file mode 100644 index 0000000000000000000000000000000000000000..9368df9a9efd152635d45d52a9af486429409f40 --- /dev/null +++ b/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out @@ -0,0 +1,519 @@ +PREHOOK: query: create table tlb1 (id int, fkey int, val string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tlb1 +POSTHOOK: query: create table tlb1 (id int, fkey int, val string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tlb1 +PREHOOK: query: create table tlb2 (fid int, name string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tlb2 +POSTHOOK: query: create table tlb2 (fid int, name string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tlb2 +PREHOOK: query: insert into table tlb1 values(100,1,'abc') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@tlb1 +POSTHOOK: query: insert into table tlb1 values(100,1,'abc') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@tlb1 +POSTHOOK: Lineage: tlb1.fkey EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tlb1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tlb1.val SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into table tlb1 values(200,1,'efg') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@tlb1 +POSTHOOK: query: insert into table tlb1 values(200,1,'efg') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@tlb1 +POSTHOOK: Lineage: tlb1.fkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tlb1.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tlb1.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into table tlb2 values(1, 'key1') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@tlb2 +POSTHOOK: query: insert into table tlb2 values(1, 'key1') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@tlb2 +POSTHOOK: Lineage: tlb2.fid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tlb2.name SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tlb1 + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), fkey (type: int) + outputColumnNames: id, fkey + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: id (type: int), fkey (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0, _col1 + partition by: _col0, _col1 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: aaa + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: fid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: fid (type: int) + sort order: + + Map-reduce partition columns: fid (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: name (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 fid (type: int) + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +PREHOOK: type: QUERY +PREHOOK: Input: default@tlb1 +PREHOOK: Input: default@tlb2 +#### A masked pattern was here #### +POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tlb1 +POSTHOOK: Input: default@tlb2 +#### A masked pattern was here #### +200 1 key1 +100 1 key1 +PREHOOK: query: explain +select ddd.id, ddd.fkey, aaa.name, ddd.rnum +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ddd.id, ddd.fkey, aaa.name, ddd.rnum +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tlb1 + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), fkey (type: int) + outputColumnNames: id, fkey + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: id (type: int), fkey (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0, _col1 + partition by: _col0, _col1 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), row_number_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int) + TableScan + alias: aaa + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: fid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: fid (type: int) + sort order: + + Map-reduce partition columns: fid (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: name (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 fid (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ddd.id, ddd.fkey, aaa.name, ddd.rnum +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +PREHOOK: type: QUERY +PREHOOK: Input: default@tlb1 +PREHOOK: Input: default@tlb2 +#### A masked pattern was here #### +POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name, ddd.rnum +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tlb1 +POSTHOOK: Input: default@tlb2 +#### A masked pattern was here #### +200 1 key1 1 +100 1 key1 1 +PREHOOK: query: explain +select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tlb1 + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), fkey (type: int) + outputColumnNames: id, fkey + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: id (type: int), fkey (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0, _col1 + partition by: _col0, _col1 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + TableScan + alias: aaa + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: fid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: fid (type: int) + sort order: + + Map-reduce partition columns: fid (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: name (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 fid (type: int) + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +PREHOOK: type: QUERY +PREHOOK: Input: default@tlb1 +PREHOOK: Input: default@tlb2 +#### A masked pattern was here #### +POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name +from ( + select id, fkey, + row_number() over (partition by id, fkey) as rnum + from tlb1 group by id, fkey + ) ddd +inner join tlb2 aaa on aaa.fid = ddd.fkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tlb1 +POSTHOOK: Input: default@tlb2 +#### A masked pattern was here #### +200 1 key1 +100 1 key1 diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out index e8a978623d5333efff3834573a966bca47b11a98..2433655e48950f77ee38c38dd02e169789e7b6c3 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -6967,22 +6967,25 @@ Stage-0 Map-reduce partition columns:rand() (type: double) sort order:+++ Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE - PTF Operator [PTF_3] - Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Select Operator [SEL_4] + outputColumnNames:["_col1","_col2","_col5"] Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_2] - | outputColumnNames:["_col1","_col2","_col5"] - | Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 1 [SIMPLE_EDGE] - Reduce Output Operator [RS_1] - key expressions:p_mfgr (type: string), p_name (type: string) - Map-reduce partition columns:p_mfgr (type: string) - sort order:++ - Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:p_size (type: int) - TableScan [TS_0] - alias:part + PTF Operator [PTF_3] + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}] + Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_2] + | outputColumnNames:["_col1","_col2","_col5"] + | Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 1 [SIMPLE_EDGE] + Reduce Output Operator [RS_1] + key expressions:p_mfgr (type: string), p_name (type: string) + Map-reduce partition columns:p_mfgr (type: string) + sort order:++ Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:p_size (type: int) + TableScan [TS_0] + alias:part + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select abc.*