diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 51385cf..d9ae7f3 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -1074,6 +1074,8 @@ spark.query.files=add_part_multiple.q, \ load_dyn_part8.q, \ load_dyn_part9.q, \ louter_join_ppr.q, \ + lvj_mapjoin.q, \ + lvj_ptf.q, \ mapjoin1.q, \ mapjoin_addjar.q, \ mapjoin_decimal.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java index afbeccb..34a97c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java @@ -108,9 +108,16 @@ public Object process(Node nd, Stack stack, // b) multi insert. Mux/Demux will hit the same leaf again, multi insert // will result into a vertex with multiple FS or RS operators. - // At this point we don't have to do anything special in this case. Just - // run through the regular paces w/o creating a new task. - work = context.rootToWorkMap.get(root); + // If it is the mux/demux case (e.g., lateral view). We should exit now. + if (context.childToWorkMap.containsKey(operator)) { + context.currentMapJoinOperators.clear(); + context.currentUnionOperators.clear(); + return null; + } else { + // At this point we don't have to do anything special in this case. Just + // run through the regular paces w/o creating a new task. + work = context.rootToWorkMap.get(root); + } } else { // create a new vertex if (context.preceedingWork == null) { @@ -247,29 +254,11 @@ public Object process(Node nd, Stack stack, SparkEdgeProperty edgeProp = childWorkInfo.getFirst(); ReduceWork childWork = childWorkInfo.getSecond(); - LOG.debug("Second pass. Leaf operator: " + operator + " has common downstream work:" + childWork); + LOG.debug("Second pass. Leaf operator: " + operator + " has common downstream work: " + childWork); - // We may have already connected `work` with `childWork`, in case, for example, lateral view: - // TS - // | - // ... - // | - // LVF - // | \ - // SEL SEL - // | | - // LVJ-UDTF - // | - // SEL - // | - // RS - // Here, RS can be reached from TS via two different paths. If there is any child work after RS, - // we don't want to connect them with the work associated with TS more than once. - if (sparkWork.getEdgeProperty(work, childWork) == null) { - sparkWork.connect(work, childWork, edgeProp); - } else { - LOG.debug("work " + work.getName() + " is already connected to " + childWork.getName() + " before"); - } + Preconditions.checkState(sparkWork.getEdgeProperty(work, childWork) == null, + "Work " + work.getName() + " is already connected to " + childWork.getName() + "!"); + sparkWork.connect(work, childWork, edgeProp); } else { LOG.debug("First pass. Leaf operator: " + operator); } diff --git ql/src/test/queries/clientpositive/lvj_ptf.q ql/src/test/queries/clientpositive/lvj_ptf.q new file mode 100644 index 0000000..7ef7c56 --- /dev/null +++ ql/src/test/queries/clientpositive/lvj_ptf.q @@ -0,0 +1,35 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=4; + +create table t1 (a string); +create table t2 (a array); +create table dummy (a string); + +insert into table dummy values ("a"); +insert into t1 values ("1"), ("2"); +insert into t2 select array("1", "2", "3", "4") from dummy; + +explain +with tt1 as ( + select a as id, count(*) over () as count + from t1 +), +tt2 as ( + select id + from t2 + lateral view outer explode(a) a_tbl as id +) +select tt1.count +from tt1 join tt2 on tt1.id = tt2.id; + +with tt1 as ( + select a as id, count(*) over () as count + from t1 +), +tt2 as ( + select id + from t2 + lateral view outer explode(a) a_tbl as id +) +select tt1.count +from tt1 join tt2 on tt1.id = tt2.id; diff --git ql/src/test/results/clientpositive/spark/lvj_mapjoin.q.out ql/src/test/results/clientpositive/spark/lvj_mapjoin.q.out new file mode 100644 index 0000000..4031d42 --- /dev/null +++ ql/src/test/results/clientpositive/spark/lvj_mapjoin.q.out @@ -0,0 +1,339 @@ +PREHOOK: query: drop table sour1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table sour1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table sour2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table sour2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table expod1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table expod1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table expod2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table expod2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table sour1(id int, av1 string, av2 string, av3 string) row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sour1 +POSTHOOK: query: create table sour1(id int, av1 string, av2 string, av3 string) row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sour1 +PREHOOK: query: create table sour2(id int, bv1 string, bv2 string, bv3 string) row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sour2 +POSTHOOK: query: create table sour2(id int, bv1 string, bv2 string, bv3 string) row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sour2 +PREHOOK: query: load data local inpath '../../data/files/sour1.txt' into table sour1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@sour1 +POSTHOOK: query: load data local inpath '../../data/files/sour1.txt' into table sour1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@sour1 +PREHOOK: query: load data local inpath '../../data/files//sour2.txt' into table sour2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@sour2 +POSTHOOK: query: load data local inpath '../../data/files//sour2.txt' into table sour2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@sour2 +PREHOOK: query: create table expod1(aid int, av array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@expod1 +POSTHOOK: query: create table expod1(aid int, av array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@expod1 +PREHOOK: query: create table expod2(bid int, bv array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@expod2 +POSTHOOK: query: create table expod2(bid int, bv array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@expod2 +PREHOOK: query: insert overwrite table expod1 select id, array(av1,av2,av3) from sour1 +PREHOOK: type: QUERY +PREHOOK: Input: default@sour1 +PREHOOK: Output: default@expod1 +POSTHOOK: query: insert overwrite table expod1 select id, array(av1,av2,av3) from sour1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sour1 +POSTHOOK: Output: default@expod1 +POSTHOOK: Lineage: expod1.aid SIMPLE [(sour1)sour1.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: expod1.av EXPRESSION [(sour1)sour1.FieldSchema(name:av1, type:string, comment:null), (sour1)sour1.FieldSchema(name:av2, type:string, comment:null), (sour1)sour1.FieldSchema(name:av3, type:string, comment:null), ] +PREHOOK: query: insert overwrite table expod2 select id, array(bv1,bv2,bv3) from sour2 +PREHOOK: type: QUERY +PREHOOK: Input: default@sour2 +PREHOOK: Output: default@expod2 +POSTHOOK: query: insert overwrite table expod2 select id, array(bv1,bv2,bv3) from sour2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sour2 +POSTHOOK: Output: default@expod2 +POSTHOOK: Lineage: expod2.bid SIMPLE [(sour2)sour2.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: expod2.bv EXPRESSION [(sour2)sour2.FieldSchema(name:bv1, type:string, comment:null), (sour2)sour2.FieldSchema(name:bv2, type:string, comment:null), (sour2)sour2.FieldSchema(name:bv3, type:string, comment:null), ] +PREHOOK: query: explain with sub1 as +(select aid, avalue from expod1 lateral view explode(av) avs as avalue ), +sub2 as +(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue) +select sub1.aid, sub1.avalue, sub2.bvalue +from sub1,sub2 +where sub1.aid=sub2.bid +PREHOOK: type: QUERY +POSTHOOK: query: explain with sub1 as +(select aid, avalue from expod1 lateral view explode(av) avs as avalue ), +sub2 as +(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue) +select sub1.aid, sub1.avalue, sub2.bvalue +from sub1,sub2 +where sub1.aid=sub2.bid +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: expod2 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: bid is not null (type: boolean) + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: bid (type: int) + outputColumnNames: bid + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: bv (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: expod1 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: aid is not null (type: boolean) + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: aid (type: int) + outputColumnNames: aid + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 6 Data size: 85 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col2) (type: boolean) + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: av (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 6 Data size: 85 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col2) (type: boolean) + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: with sub1 as +(select aid, avalue from expod1 lateral view explode(av) avs as avalue ), +sub2 as +(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue) +select sub1.aid, sub1.avalue, sub2.bvalue +from sub1,sub2 +where sub1.aid=sub2.bid +PREHOOK: type: QUERY +PREHOOK: Input: default@expod1 +PREHOOK: Input: default@expod2 +#### A masked pattern was here #### +POSTHOOK: query: with sub1 as +(select aid, avalue from expod1 lateral view explode(av) avs as avalue ), +sub2 as +(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue) +select sub1.aid, sub1.avalue, sub2.bvalue +from sub1,sub2 +where sub1.aid=sub2.bid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@expod1 +POSTHOOK: Input: default@expod2 +#### A masked pattern was here #### +1 a1 b1 +1 a1 b11 +1 a1 b111 +1 a11 b1 +1 a11 b11 +1 a11 b111 +1 a111 b1 +1 a111 b11 +1 a111 b111 +2 a2 b2 +2 a2 b22 +2 a2 b222 +2 a22 b2 +2 a22 b22 +2 a22 b222 +2 a222 b2 +2 a222 b22 +2 a222 b222 +PREHOOK: query: create temporary table tmp_lateral_view( + arst array> + ) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_lateral_view +POSTHOOK: query: create temporary table tmp_lateral_view( + arst array> + ) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_lateral_view +PREHOOK: query: insert into table tmp_lateral_view + select array(named_struct('age',cint,'name',cstring1)) + from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@tmp_lateral_view +POSTHOOK: query: insert into table tmp_lateral_view + select array(named_struct('age',cint,'name',cstring1)) + from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@tmp_lateral_view +POSTHOOK: Lineage: tmp_lateral_view.arst EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: select arst.name, arst.age + from tmp_lateral_view + lateral view inline(arst) arst +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_lateral_view +#### A masked pattern was here #### +POSTHOOK: query: select arst.name, arst.age + from tmp_lateral_view + lateral view inline(arst) arst +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_lateral_view +#### A masked pattern was here #### +cvLH6Eat2yFsyy7p 528534767 +cvLH6Eat2yFsyy7p 528534767 +cvLH6Eat2yFsyy7p 528534767 +cvLH6Eat2yFsyy7p 528534767 +cvLH6Eat2yFsyy7p 528534767 +cvLH6Eat2yFsyy7p 528534767 +cvLH6Eat2yFsyy7p 528534767 +cvLH6Eat2yFsyy7p 528534767 +cvLH6Eat2yFsyy7p 528534767 +cvLH6Eat2yFsyy7p 528534767 diff --git ql/src/test/results/clientpositive/spark/lvj_ptf.q.out ql/src/test/results/clientpositive/spark/lvj_ptf.q.out new file mode 100644 index 0000000..17222c4 --- /dev/null +++ ql/src/test/results/clientpositive/spark/lvj_ptf.q.out @@ -0,0 +1,257 @@ +PREHOOK: query: create table t1 (a string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (a string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (a array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (a array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: create table dummy (a string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: create table dummy (a string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy +PREHOOK: query: insert into table dummy values ("a") +PREHOOK: type: QUERY +PREHOOK: Output: default@dummy +POSTHOOK: query: insert into table dummy values ("a") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@dummy +POSTHOOK: Lineage: dummy.a SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t1 values ("1"), ("2") +PREHOOK: type: QUERY +PREHOOK: Output: default@t1 +POSTHOOK: query: insert into t1 values ("1"), ("2") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.a SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t2 select array("1", "2", "3", "4") from dummy +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 select array("1", "2", "3", "4") from dummy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.a EXPRESSION [] +PREHOOK: query: explain +with tt1 as ( + select a as id, count(*) over () as count + from t1 +), +tt2 as ( + select id + from t2 + lateral view outer explode(a) a_tbl as id +) +select tt1.count +from tt1 join tt2 on tt1.id = tt2.id +PREHOOK: type: QUERY +POSTHOOK: query: explain +with tt1 as ( + select a as id, count(*) over () as count + from t1 +), +tt2 as ( + select id + from t2 + lateral view outer explode(a) a_tbl as id +) +select tt1.count +from tt1 join tt2 on tt1.id = tt2.id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + value expressions: a (type: string) + Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: count_window_0 + name: count + window function: GenericUDAFCountEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isStar: true + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), count_window_0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col4 + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: a (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + function name: explode + outer lateral view: true + Filter Operator + predicate: col is not null (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col4 + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: with tt1 as ( + select a as id, count(*) over () as count + from t1 +), +tt2 as ( + select id + from t2 + lateral view outer explode(a) a_tbl as id +) +select tt1.count +from tt1 join tt2 on tt1.id = tt2.id +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: with tt1 as ( + select a as id, count(*) over () as count + from t1 +), +tt2 as ( + select id + from t2 + lateral view outer explode(a) a_tbl as id +) +select tt1.count +from tt1 join tt2 on tt1.id = tt2.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 +2