------------------------------------------------------------------------------- Test set: com.cloudera.impala.planner.PlannerTest ------------------------------------------------------------------------------- Tests run: 38, Failures: 2, Errors: 0, Skipped: 0, Time elapsed: 28.064 sec <<< FAILURE! - in com.cloudera.impala.planner.PlannerTest testTpchNested(com.cloudera.impala.planner.PlannerTest) Time elapsed: 1.049 sec <<< FAILURE! java.lang.AssertionError: Section PLAN of query: select s_name, count(*) as numwait from supplier s, customer c, c.c_orders o, o.o_lineitems l1, region.r_nations n where s_suppkey = l1.l_suppkey and o_orderstatus = 'F' and l1.l_receiptdate > l1.l_commitdate and exists ( select * from o.o_lineitems l2 where l2.l_suppkey <> l1.l_suppkey ) and not exists ( select * from o.o_lineitems l3 where l3.l_suppkey <> l1.l_suppkey and l3.l_receiptdate > l3.l_commitdate ) and s_nationkey = n_nationkey and n_name = 'SAUDI ARABIA' group by s_name order by numwait desc, s_name limit 100 Actual does not match expected result: 20:TOP-N [LIMIT=100] | order by: count(*) DESC, s_name ASC | 19:AGGREGATE [FINALIZE] | output: count(*) | group by: s_name | 18:SUBPLAN | |--16:NESTED LOOP JOIN [RIGHT ANTI JOIN] | | join predicates: l3.l_suppkey != l1.l_suppkey | | | |--14:SINGULAR ROW SRC | | | 15:UNNEST [o.o_lineitems l3] | 17:HASH JOIN [INNER JOIN] | hash predicates: s_nationkey = n_nationkey | runtime filters: RF000 <- n_nationkey | |--12:SCAN HDFS [tpch_nested_parquet.region.r_nations n] | partitions=1/1 files=1 size=4.18KB | predicates: n_name = 'SAUDI ARABIA' | 13:HASH JOIN [INNER JOIN] | hash predicates: l1.l_suppkey = s_suppkey | |--00:SCAN HDFS [tpch_nested_parquet.supplier s] | partitions=1/1 files=1 size=111.08MB | runtime filters: RF000 -> s_nationkey | 02:SUBPLAN | |--11:NESTED LOOP JOIN [CROSS JOIN] | | | |--03:SINGULAR ROW SRC | | | 05:SUBPLAN | | | |--10:NESTED LOOP JOIN [LEFT SEMI JOIN] | | | join predicates: l2.l_suppkey != l1.l_suppkey | | | | | |--08:UNNEST [o.o_lineitems l2] | | | | | 09:NESTED LOOP JOIN [CROSS JOIN] | | | | | |--06:SINGULAR ROW SRC | | | | | 07:UNNEST [o.o_lineitems l1] | | | 04:UNNEST [c.c_orders o] | 01:SCAN HDFS [tpch_nested_parquet.customer c] partitions=1/1 files=4 size=577.87MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderstatus = 'F' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ predicates on l1: l1.l_receiptdate > l1.l_commitdate predicates on l3: l3.l_receiptdate > l3.l_commitdate Expected: 20:TOP-N [LIMIT=100] | order by: count(*) DESC, s_name ASC | 19:AGGREGATE [FINALIZE] | output: count(*) | group by: s_name | 18:SUBPLAN | |--16:NESTED LOOP JOIN [RIGHT ANTI JOIN] | | join predicates: l3.l_suppkey != l1.l_suppkey | | | |--14:SINGULAR ROW SRC | | | 15:UNNEST [o.o_lineitems l3] | 17:HASH JOIN [INNER JOIN] | hash predicates: s_nationkey = n_nationkey | runtime filters: RF000 <- n_nationkey | |--12:SCAN HDFS [tpch_nested_parquet.region.r_nations n] | partitions=1/1 files=1 size=4.18KB | predicates: n_name = 'SAUDI ARABIA' | 13:HASH JOIN [INNER JOIN] | hash predicates: l1.l_suppkey = s_suppkey | |--00:SCAN HDFS [tpch_nested_parquet.supplier s] | partitions=1/1 files=1 size=111.08MB | runtime filters: RF000 -> s_nationkey | 02:SUBPLAN | |--11:NESTED LOOP JOIN [CROSS JOIN] | | | |--03:SINGULAR ROW SRC | | | 05:SUBPLAN | | | |--10:NESTED LOOP JOIN [LEFT SEMI JOIN] | | | join predicates: l2.l_suppkey != l1.l_suppkey | | | | | |--08:UNNEST [o.o_lineitems l2] | | | | | 09:NESTED LOOP JOIN [CROSS JOIN] | | | | | |--06:SINGULAR ROW SRC | | | | | 07:UNNEST [o.o_lineitems l1] | | | 04:UNNEST [c.c_orders o] | 01:SCAN HDFS [tpch_nested_parquet.customer c] partitions=1/1 files=4 size=577.87MB predicates: !empty(c.c_orders) predicates on o: o_orderstatus = 'F', !empty(o.o_lineitems) predicates on l1: l1.l_receiptdate > l1.l_commitdate predicates on l3: l3.l_receiptdate > l3.l_commitdate Verbose plan: F00:PLAN FRAGMENT [UNPARTITIONED] 20:TOP-N [LIMIT=100] | order by: count(*) DESC, s_name ASC | hosts=3 per-host-mem=unavailable | tuple-ids=10 row-size=42B cardinality=100 | 19:AGGREGATE [FINALIZE] | output: count(*) | group by: s_name | hosts=3 per-host-mem=unavailable | tuple-ids=9 row-size=42B cardinality=9965 | 18:SUBPLAN | hosts=3 per-host-mem=unavailable | tuple-ids=3,2,1,0,4 row-size=182B cardinality=15000000 | |--16:NESTED LOOP JOIN [RIGHT ANTI JOIN] | | join predicates: l3.l_suppkey != l1.l_suppkey | | hosts=3 per-host-mem=unavailable | | tuple-ids=3,2,1,0,4 row-size=182B cardinality=1 | | | |--14:SINGULAR ROW SRC | | parent-subplan=18 | | hosts=3 per-host-mem=unavailable | | tuple-ids=3,2,1,0,4 row-size=182B cardinality=1 | | | 15:UNNEST [o.o_lineitems l3] | parent-subplan=18 | hosts=3 per-host-mem=unavailable | tuple-ids=7 row-size=40B cardinality=10 | 17:HASH JOIN [INNER JOIN] | hash predicates: s_nationkey = n_nationkey | runtime filters: RF000 <- n_nationkey | hosts=3 per-host-mem=unavailable | tuple-ids=3,2,1,0,4 row-size=182B cardinality=15000000 | |--12:SCAN HDFS [tpch_nested_parquet.region.r_nations n] | partitions=1/1 files=1 size=4.18KB | predicates: n_name = 'SAUDI ARABIA' | table stats: 5 rows total | column stats: all | hosts=1 per-host-mem=unavailable | tuple-ids=4 row-size=18B cardinality=5 | 13:HASH JOIN [INNER JOIN] | hash predicates: l1.l_suppkey = s_suppkey | hosts=3 per-host-mem=unavailable | tuple-ids=3,2,1,0 row-size=164B cardinality=15000000 | |--00:SCAN HDFS [tpch_nested_parquet.supplier s] | partitions=1/1 files=1 size=111.08MB | runtime filters: RF000 -> s_nationkey | table stats: 10000 rows total | column stats: all | hosts=1 per-host-mem=unavailable | tuple-ids=0 row-size=44B cardinality=10000 | 02:SUBPLAN | hosts=3 per-host-mem=unavailable | tuple-ids=3,2,1 row-size=120B cardinality=15000000 | |--11:NESTED LOOP JOIN [CROSS JOIN] | | hosts=3 per-host-mem=unavailable | | tuple-ids=3,2,1 row-size=120B cardinality=100 | | | |--03:SINGULAR ROW SRC | | parent-subplan=02 | | hosts=3 per-host-mem=unavailable | | tuple-ids=1 row-size=16B cardinality=1 | | | 05:SUBPLAN | | hosts=3 per-host-mem=unavailable | | tuple-ids=3,2 row-size=104B cardinality=100 | | | |--10:NESTED LOOP JOIN [LEFT SEMI JOIN] | | | join predicates: l2.l_suppkey != l1.l_suppkey | | | hosts=3 per-host-mem=unavailable | | | tuple-ids=3,2 row-size=104B cardinality=10 | | | | | |--08:UNNEST [o.o_lineitems l2] | | | parent-subplan=05 | | | hosts=3 per-host-mem=unavailable | | | tuple-ids=5 row-size=8B cardinality=10 | | | | | 09:NESTED LOOP JOIN [CROSS JOIN] | | | hosts=3 per-host-mem=unavailable | | | tuple-ids=3,2 row-size=104B cardinality=10 | | | | | |--06:SINGULAR ROW SRC | | | parent-subplan=05 | | | hosts=3 per-host-mem=unavailable | | | tuple-ids=2 row-size=64B cardinality=1 | | | | | 07:UNNEST [o.o_lineitems l1] | | parent-subplan=05 | | hosts=3 per-host-mem=unavailable | | tuple-ids=3 row-size=0B cardinality=10 | | | 04:UNNEST [c.c_orders o] | parent-subplan=02 | hosts=3 per-host-mem=unavailable | tuple-ids=2 row-size=0B cardinality=10 | 01:SCAN HDFS [tpch_nested_parquet.customer c] partitions=1/1 files=4 size=577.87MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderstatus = 'F' predicates on l1: l1.l_receiptdate > l1.l_commitdate predicates on l3: l3.l_receiptdate > l3.l_commitdate table stats: 150000 rows total column stats: unavailable hosts=3 per-host-mem=unavailable tuple-ids=1 row-size=16B cardinality=150000 Section DISTRIBUTEDPLAN of query: select s_name, count(*) as numwait from supplier s, customer c, c.c_orders o, o.o_lineitems l1, region.r_nations n where s_suppkey = l1.l_suppkey and o_orderstatus = 'F' and l1.l_receiptdate > l1.l_commitdate and exists ( select * from o.o_lineitems l2 where l2.l_suppkey <> l1.l_suppkey ) and not exists ( select * from o.o_lineitems l3 where l3.l_suppkey <> l1.l_suppkey and l3.l_receiptdate > l3.l_commitdate ) and s_nationkey = n_nationkey and n_name = 'SAUDI ARABIA' group by s_name order by numwait desc, s_name limit 100 Actual does not match expected result: 25:MERGING-EXCHANGE [UNPARTITIONED] | order by: count(*) DESC, s_name ASC | limit: 100 | 20:TOP-N [LIMIT=100] | order by: count(*) DESC, s_name ASC | 24:AGGREGATE [FINALIZE] | output: count:merge(*) | group by: s_name | 23:EXCHANGE [HASH(s_name)] | 19:AGGREGATE [STREAMING] | output: count(*) | group by: s_name | 18:SUBPLAN | |--16:NESTED LOOP JOIN [RIGHT ANTI JOIN] | | join predicates: l3.l_suppkey != l1.l_suppkey | | | |--14:SINGULAR ROW SRC | | | 15:UNNEST [o.o_lineitems l3] | 17:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: s_nationkey = n_nationkey | runtime filters: RF000 <- n_nationkey | |--22:EXCHANGE [BROADCAST] | | | 12:SCAN HDFS [tpch_nested_parquet.region.r_nations n] | partitions=1/1 files=1 size=4.18KB | predicates: n_name = 'SAUDI ARABIA' | 13:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: l1.l_suppkey = s_suppkey | |--21:EXCHANGE [BROADCAST] | | | 00:SCAN HDFS [tpch_nested_parquet.supplier s] | partitions=1/1 files=1 size=111.08MB | runtime filters: RF000 -> s_nationkey | 02:SUBPLAN | |--11:NESTED LOOP JOIN [CROSS JOIN] | | | |--03:SINGULAR ROW SRC | | | 05:SUBPLAN | | | |--10:NESTED LOOP JOIN [LEFT SEMI JOIN] | | | join predicates: l2.l_suppkey != l1.l_suppkey | | | | | |--08:UNNEST [o.o_lineitems l2] | | | | | 09:NESTED LOOP JOIN [CROSS JOIN] | | | | | |--06:SINGULAR ROW SRC | | | | | 07:UNNEST [o.o_lineitems l1] | | | 04:UNNEST [c.c_orders o] | 01:SCAN HDFS [tpch_nested_parquet.customer c] partitions=1/1 files=4 size=577.87MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderstatus = 'F' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ predicates on l1: l1.l_receiptdate > l1.l_commitdate predicates on l3: l3.l_receiptdate > l3.l_commitdate Expected: 25:MERGING-EXCHANGE [UNPARTITIONED] | order by: count(*) DESC, s_name ASC | limit: 100 | 20:TOP-N [LIMIT=100] | order by: count(*) DESC, s_name ASC | 24:AGGREGATE [FINALIZE] | output: count:merge(*) | group by: s_name | 23:EXCHANGE [HASH(s_name)] | 19:AGGREGATE [STREAMING] | output: count(*) | group by: s_name | 18:SUBPLAN | |--16:NESTED LOOP JOIN [RIGHT ANTI JOIN] | | join predicates: l3.l_suppkey != l1.l_suppkey | | | |--14:SINGULAR ROW SRC | | | 15:UNNEST [o.o_lineitems l3] | 17:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: s_nationkey = n_nationkey | runtime filters: RF000 <- n_nationkey | |--22:EXCHANGE [BROADCAST] | | | 12:SCAN HDFS [tpch_nested_parquet.region.r_nations n] | partitions=1/1 files=1 size=4.18KB | predicates: n_name = 'SAUDI ARABIA' | 13:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: l1.l_suppkey = s_suppkey | |--21:EXCHANGE [BROADCAST] | | | 00:SCAN HDFS [tpch_nested_parquet.supplier s] | partitions=1/1 files=1 size=111.08MB | runtime filters: RF000 -> s_nationkey | 02:SUBPLAN | |--11:NESTED LOOP JOIN [CROSS JOIN] | | | |--03:SINGULAR ROW SRC | | | 05:SUBPLAN | | | |--10:NESTED LOOP JOIN [LEFT SEMI JOIN] | | | join predicates: l2.l_suppkey != l1.l_suppkey | | | | | |--08:UNNEST [o.o_lineitems l2] | | | | | 09:NESTED LOOP JOIN [CROSS JOIN] | | | | | |--06:SINGULAR ROW SRC | | | | | 07:UNNEST [o.o_lineitems l1] | | | 04:UNNEST [c.c_orders o] | 01:SCAN HDFS [tpch_nested_parquet.customer c] partitions=1/1 files=4 size=577.87MB predicates: !empty(c.c_orders) predicates on o: o_orderstatus = 'F', !empty(o.o_lineitems) predicates on l1: l1.l_receiptdate > l1.l_commitdate predicates on l3: l3.l_receiptdate > l3.l_commitdate Verbose plan: F04:PLAN FRAGMENT [UNPARTITIONED] 25:MERGING-EXCHANGE [UNPARTITIONED] order by: count(*) DESC, s_name ASC limit: 100 hosts=3 per-host-mem=unavailable tuple-ids=10 row-size=42B cardinality=100 F03:PLAN FRAGMENT [HASH(s_name)] DATASTREAM SINK [FRAGMENT=F04, EXCHANGE=25, UNPARTITIONED] 20:TOP-N [LIMIT=100] | order by: count(*) DESC, s_name ASC | hosts=3 per-host-mem=4.10KB | tuple-ids=10 row-size=42B cardinality=100 | 24:AGGREGATE [FINALIZE] | output: count:merge(*) | group by: s_name | hosts=3 per-host-mem=10.00MB | tuple-ids=9 row-size=42B cardinality=9965 | 23:EXCHANGE [HASH(s_name)] hosts=3 per-host-mem=0B tuple-ids=9 row-size=42B cardinality=9965 F00:PLAN FRAGMENT [RANDOM] DATASTREAM SINK [FRAGMENT=F03, EXCHANGE=23, HASH(s_name)] 19:AGGREGATE [STREAMING] | output: count(*) | group by: s_name | hosts=3 per-host-mem=10.00MB | tuple-ids=9 row-size=42B cardinality=9965 | 18:SUBPLAN | hosts=3 per-host-mem=0B | tuple-ids=3,2,1,0,4 row-size=182B cardinality=15000000 | |--16:NESTED LOOP JOIN [RIGHT ANTI JOIN] | | join predicates: l3.l_suppkey != l1.l_suppkey | | hosts=3 per-host-mem=182B | | tuple-ids=3,2,1,0,4 row-size=182B cardinality=1 | | | |--14:SINGULAR ROW SRC | | parent-subplan=18 | | hosts=3 per-host-mem=0B | | tuple-ids=3,2,1,0,4 row-size=182B cardinality=1 | | | 15:UNNEST [o.o_lineitems l3] | parent-subplan=18 | hosts=3 per-host-mem=0B | tuple-ids=7 row-size=40B cardinality=10 | 17:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: s_nationkey = n_nationkey | runtime filters: RF000 <- n_nationkey | hosts=3 per-host-mem=100B | tuple-ids=3,2,1,0,4 row-size=182B cardinality=15000000 | |--22:EXCHANGE [BROADCAST] | hosts=1 per-host-mem=0B | tuple-ids=4 row-size=18B cardinality=5 | 13:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: l1.l_suppkey = s_suppkey | hosts=3 per-host-mem=472.66KB | tuple-ids=3,2,1,0 row-size=164B cardinality=15000000 | |--21:EXCHANGE [BROADCAST] | hosts=1 per-host-mem=0B | tuple-ids=0 row-size=44B cardinality=10000 | 02:SUBPLAN | hosts=3 per-host-mem=0B | tuple-ids=3,2,1 row-size=120B cardinality=15000000 | |--11:NESTED LOOP JOIN [CROSS JOIN] | | hosts=3 per-host-mem=16B | | tuple-ids=3,2,1 row-size=120B cardinality=100 | | | |--03:SINGULAR ROW SRC | | parent-subplan=02 | | hosts=3 per-host-mem=0B | | tuple-ids=1 row-size=16B cardinality=1 | | | 05:SUBPLAN | | hosts=3 per-host-mem=0B | | tuple-ids=3,2 row-size=104B cardinality=100 | | | |--10:NESTED LOOP JOIN [LEFT SEMI JOIN] | | | join predicates: l2.l_suppkey != l1.l_suppkey | | | hosts=3 per-host-mem=80B | | | tuple-ids=3,2 row-size=104B cardinality=10 | | | | | |--08:UNNEST [o.o_lineitems l2] | | | parent-subplan=05 | | | hosts=3 per-host-mem=0B | | | tuple-ids=5 row-size=8B cardinality=10 | | | | | 09:NESTED LOOP JOIN [CROSS JOIN] | | | hosts=3 per-host-mem=64B | | | tuple-ids=3,2 row-size=104B cardinality=10 | | | | | |--06:SINGULAR ROW SRC | | | parent-subplan=05 | | | hosts=3 per-host-mem=0B | | | tuple-ids=2 row-size=64B cardinality=1 | | | | | 07:UNNEST [o.o_lineitems l1] | | parent-subplan=05 | | hosts=3 per-host-mem=0B | | tuple-ids=3 row-size=0B cardinality=10 | | | 04:UNNEST [c.c_orders o] | parent-subplan=02 | hosts=3 per-host-mem=0B | tuple-ids=2 row-size=0B cardinality=10 | 01:SCAN HDFS [tpch_nested_parquet.customer c, RANDOM] partitions=1/1 files=4 size=577.87MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderstatus = 'F' predicates on l1: l1.l_receiptdate > l1.l_commitdate predicates on l3: l3.l_receiptdate > l3.l_commitdate table stats: 150000 rows total column stats: unavailable hosts=3 per-host-mem=88.00MB tuple-ids=1 row-size=16B cardinality=150000 F02:PLAN FRAGMENT [RANDOM] DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=22, BROADCAST] 12:SCAN HDFS [tpch_nested_parquet.region.r_nations n, RANDOM] partitions=1/1 files=1 size=4.18KB predicates: n_name = 'SAUDI ARABIA' table stats: 5 rows total column stats: all hosts=1 per-host-mem=32.00MB tuple-ids=4 row-size=18B cardinality=5 F01:PLAN FRAGMENT [RANDOM] DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=21, BROADCAST] 00:SCAN HDFS [tpch_nested_parquet.supplier s, RANDOM] partitions=1/1 files=1 size=111.08MB runtime filters: RF000 -> s_nationkey table stats: 10000 rows total column stats: all hosts=1 per-host-mem=264.00MB tuple-ids=0 row-size=44B cardinality=10000 at org.junit.Assert.fail(Assert.java:88) at com.cloudera.impala.planner.PlannerTestBase.runPlannerTestFile(PlannerTestBase.java:676) at com.cloudera.impala.planner.PlannerTestBase.runPlannerTestFile(PlannerTestBase.java:685) at com.cloudera.impala.planner.PlannerTest.testTpchNested(PlannerTest.java:179) testLineage(com.cloudera.impala.planner.PlannerTest) Time elapsed: 1.034 sec <<< FAILURE! java.lang.AssertionError: section LINEAGE of query: select * from ( select tinyint_col + int_col x from functional.alltypes union all select sum(bigint_col) y from (select bigint_col from functional.alltypes) v1) v2 Output: {"queryText":"select * from (\n select tinyint_col + int_col x from functional.alltypes\n union all\n select sum(bigint_col) y from (select bigint_col from functional.alltypes) v1) v2","hash":"25456c60a2e874a20732f42c7af27553","user":"lv","timestamp":1464539799,"edges":[{"sources":[1,2,3],"targets":[0],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"x"},{"id":1,"vertexType":"COLUMN","vertexId":"functional.alltypes.int_col"},{"id":2,"vertexType":"COLUMN","vertexId":"functional.alltypes.bigint_col"},{"id":3,"vertexType":"COLUMN","vertexId":"functional.alltypes.tinyint_col"}]} Expected: { "queryText":"select * from (\n select tinyint_col + int_col x from functional.alltypes\n union all\n select sum(bigint_col) y from (select bigint_col from functional.alltypes) v1) v2", "hash":"25456c60a2e874a20732f42c7af27553", "user":"dev", "timestamp":1446159271, "edges":[ { "sources":[ 1, 2, 3 ], "targets":[ 0 ], "edgeType":"PROJECTION" } ], "vertices":[ { "id":0, "vertexType":"COLUMN", "vertexId":"x" }, { "id":1, "vertexType":"COLUMN", "vertexId":"functional.alltypes.int_col" }, { "id":2, "vertexType":"COLUMN", "vertexId":"functional.alltypes.tinyint_col" }, { "id":3, "vertexType":"COLUMN", "vertexId":"functional.alltypes.bigint_col" } ] } section LINEAGE of query: select sum(a.tinyint_col) over (partition by a.smallint_col order by a.id), count(b.string_col), b.timestamp_col from functional.alltypes a join functional.alltypessmall b on (a.id = b.id) where a.year = 2010 and b.float_col > 0 group by a.tinyint_col, a.smallint_col, a.id, b.string_col, b.timestamp_col, b.bigint_col having count(a.int_col) > 10 order by b.bigint_col limit 10 Output: {"queryText":"select sum(a.tinyint_col) over (partition by a.smallint_col order by a.id),\n count(b.string_col), b.timestamp_col\nfrom functional.alltypes a join functional.alltypessmall b on (a.id = b.id)\nwhere a.year = 2010 and b.float_col > 0\ngroup by a.tinyint_col, a.smallint_col, a.id, b.string_col, b.timestamp_col, b.bigint_col\nhaving count(a.int_col) > 10\norder by b.bigint_col limit 10","hash":"e0309eeff9811f53c82657d62c1e04eb","user":"lv","timestamp":1464539799,"edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0],"edgeType":"PREDICATE"},{"sources":[5],"targets":[4],"edgeType":"PROJECTION"},{"sources":[7],"targets":[6],"edgeType":"PROJECTION"},{"sources":[1,2,3,5,7,8,9,10,11,12],"targets":[0,4,6],"edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"sum(a.tinyint_col) OVER(...)"},{"id":1,"vertexType":"COLUMN","vertexId":"functional.alltypes.tinyint_col"},{"id":2,"vertexType":"COLUMN","vertexId":"functional.alltypes.smallint_col"},{"id":3,"vertexType":"COLUMN","vertexId":"functional.alltypes.id"},{"id":4,"vertexType":"COLUMN","vertexId":"count(b.string_col)"},{"id":5,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.string_col"},{"id":6,"vertexType":"COLUMN","vertexId":"timestamp_col"},{"id":7,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.timestamp_col"},{"id":8,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.id"},{"id":9,"vertexType":"COLUMN","vertexId":"functional.alltypes.int_col"},{"id":10,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.bigint_col"},{"id":11,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.float_col"},{"id":12,"vertexType":"COLUMN","vertexId":"functional.alltypes.year"}]} Expected: { "queryText":"select sum(a.tinyint_col) over (partition by a.smallint_col order by a.id),\n count(b.string_col), b.timestamp_col\nfrom functional.alltypes a join functional.alltypessmall b on (a.id = b.id)\nwhere a.year = 2010 and b.float_col > 0\ngroup by a.tinyint_col, a.smallint_col, a.id, b.string_col, b.timestamp_col, b.bigint_col\nhaving count(a.int_col) > 10\norder by b.bigint_col limit 10", "hash":"e0309eeff9811f53c82657d62c1e04eb", "user":"dev", "timestamp":1446159271, "edges":[ { "sources":[ 1 ], "targets":[ 0 ], "edgeType":"PROJECTION" }, { "sources":[ 2, 3 ], "targets":[ 0 ], "edgeType":"PREDICATE" }, { "sources":[ 5 ], "targets":[ 4 ], "edgeType":"PROJECTION" }, { "sources":[ 7 ], "targets":[ 6 ], "edgeType":"PROJECTION" }, { "sources":[ 1, 2, 3, 5, 7, 8, 9, 10, 11, 12 ], "targets":[ 0, 4, 6 ], "edgeType":"PREDICATE" } ], "vertices":[ { "id":0, "vertexType":"COLUMN", "vertexId":"sum(a.tinyint_col) OVER(...)" }, { "id":1, "vertexType":"COLUMN", "vertexId":"functional.alltypes.tinyint_col" }, { "id":2, "vertexType":"COLUMN", "vertexId":"functional.alltypes.id" }, { "id":3, "vertexType":"COLUMN", "vertexId":"functional.alltypes.smallint_col" }, { "id":4, "vertexType":"COLUMN", "vertexId":"count(b.string_col)" }, { "id":5, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.string_col" }, { "id":6, "vertexType":"COLUMN", "vertexId":"timestamp_col" }, { "id":7, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.timestamp_col" }, { "id":8, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.id" }, { "id":9, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.float_col" }, { "id":10, "vertexType":"COLUMN", "vertexId":"functional.alltypes.int_col" }, { "id":11, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.bigint_col" }, { "id":12, "vertexType":"COLUMN", "vertexId":"functional.alltypes.year" } ] } section LINEAGE of query: select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col from functional.alltypessmall c join ( select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day, a.int_col int_col, a.month month, b.float_col float_col, b.id id from ( select * from functional.alltypesagg a where month=1 ) a join functional.alltypessmall b on (a.smallint_col = b.id) ) x on (x.tinyint_col = c.id) where x.day=1 and x.int_col > 899 and x.float_col > 4.5 and c.string_col < '7' and x.int_col + x.float_col + cast(c.string_col as float) < 1000 Output: {"queryText":"select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col\nfrom functional.alltypessmall c\njoin (\n select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,\n a.int_col int_col, a.month month, b.float_col float_col, b.id id\n from ( select * from functional.alltypesagg a where month=1 ) a\n join functional.alltypessmall b on (a.smallint_col = b.id)\n ) x on (x.tinyint_col = c.id)\nwhere x.day=1\nand x.int_col > 899\nand x.float_col > 4.5\nand c.string_col < '7'\nand x.int_col + x.float_col + cast(c.string_col as float) < 1000","hash":"4edf165aed5982ede63f7c91074f4b44","user":"lv","timestamp":1464539800,"edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[2],"edgeType":"PROJECTION"},{"sources":[5],"targets":[4],"edgeType":"PROJECTION"},{"sources":[3],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[6],"edgeType":"PROJECTION"},{"sources":[9],"targets":[8],"edgeType":"PROJECTION"},{"sources":[11],"targets":[10],"edgeType":"PROJECTION"},{"sources":[1,3,5,7,9,11,12,13],"targets":[0,2,4,6,8,10],"edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"smallint_col"},{"id":1,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.smallint_col"},{"id":2,"vertexType":"COLUMN","vertexId":"id"},{"id":3,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.id"},{"id":4,"vertexType":"COLUMN","vertexId":"tinyint_col"},{"id":5,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.tinyint_col"},{"id":6,"vertexType":"COLUMN","vertexId":"int_col"},{"id":7,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.int_col"},{"id":8,"vertexType":"COLUMN","vertexId":"float_col"},{"id":9,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.float_col"},{"id":10,"vertexType":"COLUMN","vertexId":"string_col"},{"id":11,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.string_col"},{"id":12,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.day"},{"id":13,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.month"}]} Expected: { "queryText":"select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col\nfrom functional.alltypessmall c\njoin (\n select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,\n a.int_col int_col, a.month month, b.float_col float_col, b.id id\n from ( select * from functional.alltypesagg a where month=1 ) a\n join functional.alltypessmall b on (a.smallint_col = b.id)\n ) x on (x.tinyint_col = c.id)\nwhere x.day=1\nand x.int_col > 899\nand x.float_col > 4.5\nand c.string_col < '7'\nand x.int_col + x.float_col + cast(c.string_col as float) < 1000", "hash":"4edf165aed5982ede63f7c91074f4b44", "user":"dev", "timestamp":1446159272, "edges":[ { "sources":[ 1 ], "targets":[ 0 ], "edgeType":"PROJECTION" }, { "sources":[ 3 ], "targets":[ 2 ], "edgeType":"PROJECTION" }, { "sources":[ 5 ], "targets":[ 4 ], "edgeType":"PROJECTION" }, { "sources":[ 3 ], "targets":[ 2 ], "edgeType":"PROJECTION" }, { "sources":[ 7 ], "targets":[ 6 ], "edgeType":"PROJECTION" }, { "sources":[ 9 ], "targets":[ 8 ], "edgeType":"PROJECTION" }, { "sources":[ 11 ], "targets":[ 10 ], "edgeType":"PROJECTION" }, { "sources":[ 1, 3, 5, 7, 9, 11, 12, 13 ], "targets":[ 0, 2, 4, 6, 8, 10 ], "edgeType":"PREDICATE" } ], "vertices":[ { "id":0, "vertexType":"COLUMN", "vertexId":"smallint_col" }, { "id":1, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.smallint_col" }, { "id":2, "vertexType":"COLUMN", "vertexId":"id" }, { "id":3, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.id" }, { "id":4, "vertexType":"COLUMN", "vertexId":"tinyint_col" }, { "id":5, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.tinyint_col" }, { "id":6, "vertexType":"COLUMN", "vertexId":"int_col" }, { "id":7, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.int_col" }, { "id":8, "vertexType":"COLUMN", "vertexId":"float_col" }, { "id":9, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.float_col" }, { "id":10, "vertexType":"COLUMN", "vertexId":"string_col" }, { "id":11, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.string_col" }, { "id":12, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.month" }, { "id":13, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.day" } ] } section LINEAGE of query: select int_col + 1, tinyint_col - 1 from functional.alltypes a where a.int_col < (select max(int_col) from functional.alltypesagg g where g.bool_col = true) and a.bigint_col > 10 Output: {"queryText":"select int_col + 1, tinyint_col - 1\nfrom functional.alltypes a\nwhere a.int_col <\n (select max(int_col) from functional.alltypesagg g where g.bool_col = true)\nand a.bigint_col > 10","hash":"5e6227f323793ea4441e2a3119af2f09","user":"lv","timestamp":1464539800,"edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[2],"edgeType":"PROJECTION"},{"sources":[1,4,5,6],"targets":[0,2],"edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"int_col + 1"},{"id":1,"vertexType":"COLUMN","vertexId":"functional.alltypes.int_col"},{"id":2,"vertexType":"COLUMN","vertexId":"tinyint_col - 1"},{"id":3,"vertexType":"COLUMN","vertexId":"functional.alltypes.tinyint_col"},{"id":4,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.int_col"},{"id":5,"vertexType":"COLUMN","vertexId":"functional.alltypes.bigint_col"},{"id":6,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.bool_col"}]} Expected: { "queryText":"select int_col + 1, tinyint_col - 1\nfrom functional.alltypes a\nwhere a.int_col <\n (select max(int_col) from functional.alltypesagg g where g.bool_col = true)\nand a.bigint_col > 10", "hash":"5e6227f323793ea4441e2a3119af2f09", "user":"dev", "timestamp":1446159272, "edges":[ { "sources":[ 1 ], "targets":[ 0 ], "edgeType":"PROJECTION" }, { "sources":[ 3 ], "targets":[ 2 ], "edgeType":"PROJECTION" }, { "sources":[ 1, 4, 5, 6 ], "targets":[ 0, 2 ], "edgeType":"PREDICATE" } ], "vertices":[ { "id":0, "vertexType":"COLUMN", "vertexId":"int_col + 1" }, { "id":1, "vertexType":"COLUMN", "vertexId":"functional.alltypes.int_col" }, { "id":2, "vertexType":"COLUMN", "vertexId":"tinyint_col - 1" }, { "id":3, "vertexType":"COLUMN", "vertexId":"functional.alltypes.tinyint_col" }, { "id":4, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.int_col" }, { "id":5, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.bool_col" }, { "id":6, "vertexType":"COLUMN", "vertexId":"functional.alltypes.bigint_col" } ] } section LINEAGE of query: select lead(a) over (partition by b order by c) from (select lead(id) over (partition by int_col order by bigint_col) as a, max(id) over (partition by tinyint_col order by int_col) as b, min(int_col) over (partition by string_col order by bool_col) as c from functional.alltypes) v Output: {"queryText":"select lead(a) over (partition by b order by c)\nfrom\n (select lead(id) over (partition by int_col order by bigint_col) as a,\n max(id) over (partition by tinyint_col order by int_col) as b,\n min(int_col) over (partition by string_col order by bool_col) as c\n from functional.alltypes) v","hash":"aa95e5e6f39fc80bb3c318a2515dc77d","user":"lv","timestamp":1464539800,"edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[1,2,3,4,5,6],"targets":[0],"edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"lead(a, 1, NULL) OVER(...)"},{"id":1,"vertexType":"COLUMN","vertexId":"functional.alltypes.id"},{"id":2,"vertexType":"COLUMN","vertexId":"functional.alltypes.int_col"},{"id":3,"vertexType":"COLUMN","vertexId":"functional.alltypes.bool_col"},{"id":4,"vertexType":"COLUMN","vertexId":"functional.alltypes.string_col"},{"id":5,"vertexType":"COLUMN","vertexId":"functional.alltypes.bigint_col"},{"id":6,"vertexType":"COLUMN","vertexId":"functional.alltypes.tinyint_col"}]} Expected: { "queryText":"select lead(a) over (partition by b order by c)\nfrom\n (select lead(id) over (partition by int_col order by bigint_col) as a,\n max(id) over (partition by tinyint_col order by int_col) as b,\n min(int_col) over (partition by string_col order by bool_col) as c\n from functional.alltypes) v", "hash":"aa95e5e6f39fc80bb3c318a2515dc77d", "user":"dev", "timestamp":1446159272, "edges":[ { "sources":[ 1 ], "targets":[ 0 ], "edgeType":"PROJECTION" }, { "sources":[ 1, 2, 3, 4, 5, 6 ], "targets":[ 0 ], "edgeType":"PREDICATE" } ], "vertices":[ { "id":0, "vertexType":"COLUMN", "vertexId":"lead(a, 1, NULL) OVER(...)" }, { "id":1, "vertexType":"COLUMN", "vertexId":"functional.alltypes.id" }, { "id":2, "vertexType":"COLUMN", "vertexId":"functional.alltypes.int_col" }, { "id":3, "vertexType":"COLUMN", "vertexId":"functional.alltypes.tinyint_col" }, { "id":4, "vertexType":"COLUMN", "vertexId":"functional.alltypes.bigint_col" }, { "id":5, "vertexType":"COLUMN", "vertexId":"functional.alltypes.string_col" }, { "id":6, "vertexType":"COLUMN", "vertexId":"functional.alltypes.bool_col" } ] } section LINEAGE of query: create view test_view_lineage (a1, a2, a3, a4, a5, a6, a7) as select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col from functional.alltypessmall c join ( select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day, a.int_col int_col, a.month month, b.float_col float_col, b.id id from ( select * from functional.alltypesagg a where month=1 ) a join functional.alltypessmall b on (a.smallint_col = b.id) ) x on (x.tinyint_col = c.id) where x.day=1 and x.int_col > 899 and x.float_col > 4.5 and c.string_col < '7' and x.int_col + x.float_col + cast(c.string_col as float) < 1000 Output: {"queryText":"create view test_view_lineage (a1, a2, a3, a4, a5, a6, a7) as\n select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col\n from functional.alltypessmall c\n join (\n select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,\n a.int_col int_col, a.month month, b.float_col float_col, b.id id\n from ( select * from functional.alltypesagg a where month=1 ) a\n join functional.alltypessmall b on (a.smallint_col = b.id)\n ) x on (x.tinyint_col = c.id)\n where x.day=1\n and x.int_col > 899\n and x.float_col > 4.5\n and c.string_col < '7'\n and x.int_col + x.float_col + cast(c.string_col as float) < 1000","hash":"ffbe643df8f26e92907fb45de1aeda36","user":"lv","timestamp":1464539800,"edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[2],"edgeType":"PROJECTION"},{"sources":[5],"targets":[4],"edgeType":"PROJECTION"},{"sources":[3],"targets":[6],"edgeType":"PROJECTION"},{"sources":[8],"targets":[7],"edgeType":"PROJECTION"},{"sources":[10],"targets":[9],"edgeType":"PROJECTION"},{"sources":[12],"targets":[11],"edgeType":"PROJECTION"},{"sources":[1,3,5,8,10,12,13,14],"targets":[0,2,4,6,7,9,11],"edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.test_view_lineage.a1"},{"id":1,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.smallint_col"},{"id":2,"vertexType":"COLUMN","vertexId":"default.test_view_lineage.a2"},{"id":3,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.test_view_lineage.a3"},{"id":5,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.tinyint_col"},{"id":6,"vertexType":"COLUMN","vertexId":"default.test_view_lineage.a4"},{"id":7,"vertexType":"COLUMN","vertexId":"default.test_view_lineage.a5"},{"id":8,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.int_col"},{"id":9,"vertexType":"COLUMN","vertexId":"default.test_view_lineage.a6"},{"id":10,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.float_col"},{"id":11,"vertexType":"COLUMN","vertexId":"default.test_view_lineage.a7"},{"id":12,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.string_col"},{"id":13,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.day"},{"id":14,"vertexType":"COLUMN","vertexId":"functional.alltypesagg.month"}]} Expected: { "queryText":"create view test_view_lineage (a1, a2, a3, a4, a5, a6, a7) as\n select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col\n from functional.alltypessmall c\n join (\n select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,\n a.int_col int_col, a.month month, b.float_col float_col, b.id id\n from ( select * from functional.alltypesagg a where month=1 ) a\n join functional.alltypessmall b on (a.smallint_col = b.id)\n ) x on (x.tinyint_col = c.id)\n where x.day=1\n and x.int_col > 899\n and x.float_col > 4.5\n and c.string_col < '7'\n and x.int_col + x.float_col + cast(c.string_col as float) < 1000", "hash":"ffbe643df8f26e92907fb45de1aeda36", "user":"dev", "timestamp":1446159272, "edges":[ { "sources":[ 1 ], "targets":[ 0 ], "edgeType":"PROJECTION" }, { "sources":[ 3 ], "targets":[ 2 ], "edgeType":"PROJECTION" }, { "sources":[ 5 ], "targets":[ 4 ], "edgeType":"PROJECTION" }, { "sources":[ 3 ], "targets":[ 6 ], "edgeType":"PROJECTION" }, { "sources":[ 8 ], "targets":[ 7 ], "edgeType":"PROJECTION" }, { "sources":[ 10 ], "targets":[ 9 ], "edgeType":"PROJECTION" }, { "sources":[ 12 ], "targets":[ 11 ], "edgeType":"PROJECTION" }, { "sources":[ 1, 3, 5, 8, 10, 12, 13, 14 ], "targets":[ 0, 2, 4, 6, 7, 9, 11 ], "edgeType":"PREDICATE" } ], "vertices":[ { "id":0, "vertexType":"COLUMN", "vertexId":"default.test_view_lineage.a1" }, { "id":1, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.smallint_col" }, { "id":2, "vertexType":"COLUMN", "vertexId":"default.test_view_lineage.a2" }, { "id":3, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.id" }, { "id":4, "vertexType":"COLUMN", "vertexId":"default.test_view_lineage.a3" }, { "id":5, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.tinyint_col" }, { "id":6, "vertexType":"COLUMN", "vertexId":"default.test_view_lineage.a4" }, { "id":7, "vertexType":"COLUMN", "vertexId":"default.test_view_lineage.a5" }, { "id":8, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.int_col" }, { "id":9, "vertexType":"COLUMN", "vertexId":"default.test_view_lineage.a6" }, { "id":10, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.float_col" }, { "id":11, "vertexType":"COLUMN", "vertexId":"default.test_view_lineage.a7" }, { "id":12, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.string_col" }, { "id":13, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.month" }, { "id":14, "vertexType":"COLUMN", "vertexId":"functional.alltypesagg.day" } ] } section LINEAGE of query: create view test_view_lineage as select * from ( select sum(a.tinyint_col) over (partition by a.smallint_col order by a.id), count(b.string_col), b.timestamp_col from functional.alltypes a join functional.alltypessmall b on (a.id = b.id) where a.year = 2010 and b.float_col > 0 group by a.tinyint_col, a.smallint_col, a.id, b.string_col, b.timestamp_col, b.bigint_col having count(a.int_col) > 10 order by b.bigint_col limit 10) t Output: {"queryText":"create view test_view_lineage as\n select * from (\n select sum(a.tinyint_col) over (partition by a.smallint_col order by a.id),\n count(b.string_col), b.timestamp_col\n from functional.alltypes a join functional.alltypessmall b on (a.id = b.id)\n where a.year = 2010 and b.float_col > 0\n group by a.tinyint_col, a.smallint_col, a.id, b.string_col, b.timestamp_col, b.bigint_col\n having count(a.int_col) > 10\n order by b.bigint_col limit 10) t","hash":"d4b9e2d63548088f911816b2ae29d7c2","user":"lv","timestamp":1464539800,"edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0],"edgeType":"PREDICATE"},{"sources":[5],"targets":[4],"edgeType":"PROJECTION"},{"sources":[7],"targets":[6],"edgeType":"PROJECTION"},{"sources":[1,2,3,5,7,8,9,10,11,12],"targets":[0,4,6],"edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.test_view_lineage._c0"},{"id":1,"vertexType":"COLUMN","vertexId":"functional.alltypes.tinyint_col"},{"id":2,"vertexType":"COLUMN","vertexId":"functional.alltypes.smallint_col"},{"id":3,"vertexType":"COLUMN","vertexId":"functional.alltypes.id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.test_view_lineage._c1"},{"id":5,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.string_col"},{"id":6,"vertexType":"COLUMN","vertexId":"default.test_view_lineage.timestamp_col"},{"id":7,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.timestamp_col"},{"id":8,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.id"},{"id":9,"vertexType":"COLUMN","vertexId":"functional.alltypes.int_col"},{"id":10,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.bigint_col"},{"id":11,"vertexType":"COLUMN","vertexId":"functional.alltypessmall.float_col"},{"id":12,"vertexType":"COLUMN","vertexId":"functional.alltypes.year"}]} Expected: { "queryText":"create view test_view_lineage as\n select * from (\n select sum(a.tinyint_col) over (partition by a.smallint_col order by a.id),\n count(b.string_col), b.timestamp_col\n from functional.alltypes a join functional.alltypessmall b on (a.id = b.id)\n where a.year = 2010 and b.float_col > 0\n group by a.tinyint_col, a.smallint_col, a.id, b.string_col, b.timestamp_col, b.bigint_col\n having count(a.int_col) > 10\n order by b.bigint_col limit 10) t", "hash":"d4b9e2d63548088f911816b2ae29d7c2", "user":"dev", "timestamp":1446159272, "edges":[ { "sources":[ 1 ], "targets":[ 0 ], "edgeType":"PROJECTION" }, { "sources":[ 2, 3 ], "targets":[ 0 ], "edgeType":"PREDICATE" }, { "sources":[ 5 ], "targets":[ 4 ], "edgeType":"PROJECTION" }, { "sources":[ 7 ], "targets":[ 6 ], "edgeType":"PROJECTION" }, { "sources":[ 1, 2, 3, 5, 7, 8, 9, 10, 11, 12 ], "targets":[ 0, 4, 6 ], "edgeType":"PREDICATE" } ], "vertices":[ { "id":0, "vertexType":"COLUMN", "vertexId":"default.test_view_lineage._c0" }, { "id":1, "vertexType":"COLUMN", "vertexId":"functional.alltypes.tinyint_col" }, { "id":2, "vertexType":"COLUMN", "vertexId":"functional.alltypes.id" }, { "id":3, "vertexType":"COLUMN", "vertexId":"functional.alltypes.smallint_col" }, { "id":4, "vertexType":"COLUMN", "vertexId":"default.test_view_lineage._c1" }, { "id":5, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.string_col" }, { "id":6, "vertexType":"COLUMN", "vertexId":"default.test_view_lineage.timestamp_col" }, { "id":7, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.timestamp_col" }, { "id":8, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.id" }, { "id":9, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.float_col" }, { "id":10, "vertexType":"COLUMN", "vertexId":"functional.alltypes.int_col" }, { "id":11, "vertexType":"COLUMN", "vertexId":"functional.alltypessmall.bigint_col" }, { "id":12, "vertexType":"COLUMN", "vertexId":"functional.alltypes.year" } ] } section LINEAGE of query: select * from ( select int_struct_col.f1 + int_struct_col.f2 x from functional.allcomplextypes where year = 2000 order by nested_struct_col.f2.f12.f21 limit 10 union all select sum(f1) y from (select complex_struct_col.f1 f1 from functional.allcomplextypes group by 1) v1) v2 Output: {"queryText":"select * from (\n select int_struct_col.f1 + int_struct_col.f2 x from functional.allcomplextypes\n where year = 2000\n order by nested_struct_col.f2.f12.f21 limit 10\n union all\n select sum(f1) y from\n (select complex_struct_col.f1 f1 from functional.allcomplextypes\n group by 1) v1) v2","hash":"4fb3ceddbf596097335af607d528f5a7","user":"lv","timestamp":1464539800,"edges":[{"sources":[1,2,3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4,5],"targets":[0],"edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"x"},{"id":1,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.int_struct_col.f1"},{"id":2,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.int_struct_col.f2"},{"id":3,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.complex_struct_col.f1"},{"id":4,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.nested_struct_col.f2.f12.f21"},{"id":5,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.year"}]} Expected: { "queryText":"select * from (\n select int_struct_col.f1 + int_struct_col.f2 x from functional.allcomplextypes\n where year = 2000\n order by nested_struct_col.f2.f12.f21 limit 10\n union all\n select sum(f1) y from\n (select complex_struct_col.f1 f1 from functional.allcomplextypes\n group by 1) v1) v2", "hash":"4fb3ceddbf596097335af607d528f5a7", "user":"dev", "timestamp":1446159272, "edges":[ { "sources":[ 1, 2, 3 ], "targets":[ 0 ], "edgeType":"PROJECTION" }, { "sources":[ 4, 5 ], "targets":[ 0 ], "edgeType":"PREDICATE" } ], "vertices":[ { "id":0, "vertexType":"COLUMN", "vertexId":"x" }, { "id":1, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.complex_struct_col.f1" }, { "id":2, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.int_struct_col.f2" }, { "id":3, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.int_struct_col.f1" }, { "id":4, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.nested_struct_col.f2.f12.f21" }, { "id":5, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.year" } ] } section LINEAGE of query: select * from functional.allcomplextypes t, t.int_array_col a, t.struct_map_col m where a.item = m.f1 Output: {"queryText":"select * from functional.allcomplextypes t, t.int_array_col a, t.struct_map_col m\n where a.item = m.f1","hash":"1b0db371b32e90d33629ed7779332cf7","user":"lv","timestamp":1464539800,"edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[2],"edgeType":"PROJECTION"},{"sources":[5],"targets":[4],"edgeType":"PROJECTION"},{"sources":[7],"targets":[6],"edgeType":"PROJECTION"},{"sources":[9],"targets":[8],"edgeType":"PROJECTION"},{"sources":[11],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[12],"edgeType":"PROJECTION"},{"sources":[7,11,14,15],"targets":[0,2,4,6,8,10,12],"edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"id"},{"id":1,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.id"},{"id":2,"vertexType":"COLUMN","vertexId":"year"},{"id":3,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.year"},{"id":4,"vertexType":"COLUMN","vertexId":"month"},{"id":5,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.month"},{"id":6,"vertexType":"COLUMN","vertexId":"item"},{"id":7,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.int_array_col.item"},{"id":8,"vertexType":"COLUMN","vertexId":"key"},{"id":9,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.struct_map_col.key"},{"id":10,"vertexType":"COLUMN","vertexId":"f1"},{"id":11,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.struct_map_col.value.f1"},{"id":12,"vertexType":"COLUMN","vertexId":"f2"},{"id":13,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.struct_map_col.value.f2"},{"id":14,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.int_array_col"},{"id":15,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.struct_map_col"}]} Expected: { "queryText":"select * from functional.allcomplextypes t, t.int_array_col a, t.struct_map_col m\n where a.item = m.f1", "hash":"1b0db371b32e90d33629ed7779332cf7", "user":"dev", "timestamp":1446159272, "edges":[ { "sources":[ 1 ], "targets":[ 0 ], "edgeType":"PROJECTION" }, { "sources":[ 3 ], "targets":[ 2 ], "edgeType":"PROJECTION" }, { "sources":[ 5 ], "targets":[ 4 ], "edgeType":"PROJECTION" }, { "sources":[ 7 ], "targets":[ 6 ], "edgeType":"PROJECTION" }, { "sources":[ 9 ], "targets":[ 8 ], "edgeType":"PROJECTION" }, { "sources":[ 11 ], "targets":[ 10 ], "edgeType":"PROJECTION" }, { "sources":[ 13 ], "targets":[ 12 ], "edgeType":"PROJECTION" }, { "sources":[ 7, 11, 14, 15 ], "targets":[ 0, 2, 4, 6, 8, 10, 12 ], "edgeType":"PREDICATE" } ], "vertices":[ { "id":0, "vertexType":"COLUMN", "vertexId":"id" }, { "id":1, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.id" }, { "id":2, "vertexType":"COLUMN", "vertexId":"year" }, { "id":3, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.year" }, { "id":4, "vertexType":"COLUMN", "vertexId":"month" }, { "id":5, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.month" }, { "id":6, "vertexType":"COLUMN", "vertexId":"item" }, { "id":7, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.int_array_col.item" }, { "id":8, "vertexType":"COLUMN", "vertexId":"key" }, { "id":9, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.struct_map_col.key" }, { "id":10, "vertexType":"COLUMN", "vertexId":"f1" }, { "id":11, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.struct_map_col.value.f1" }, { "id":12, "vertexType":"COLUMN", "vertexId":"f2" }, { "id":13, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.struct_map_col.value.f2" }, { "id":14, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.struct_map_col" }, { "id":15, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.int_array_col" } ] } section LINEAGE of query: select a + b as ab, c, d, e from functional.allcomplextypes t, (select sum(item) a from t.int_array_col where item < 10) v1, (select count(f1) b from t.struct_map_col group by key) v2, (select avg(value) over(partition by key) c from t.map_map_col.value) v3, (select item d from t.int_array_col union all select value from t.int_map_col) v4, (select f21 e from t.complex_nested_struct_col.f2.f12 order by key limit 10) v5 Output: {"queryText":"select a + b as ab, c, d, e from functional.allcomplextypes t,\n (select sum(item) a from t.int_array_col\n where item < 10) v1,\n (select count(f1) b from t.struct_map_col\n group by key) v2,\n (select avg(value) over(partition by key) c from t.map_map_col.value) v3,\n (select item d from t.int_array_col\n union all\n select value from t.int_map_col) v4,\n (select f21 e from t.complex_nested_struct_col.f2.f12 order by key limit 10) v5","hash":"4affc0d1e384475d1ff2fc2e19643064","user":"lv","timestamp":1464539800,"edges":[{"sources":[1,2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[3],"edgeType":"PREDICATE"},{"sources":[1,7],"targets":[6],"edgeType":"PROJECTION"},{"sources":[9],"targets":[8],"edgeType":"PROJECTION"},{"sources":[1,10,11],"targets":[0,3,6,8],"edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"ab"},{"id":1,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.int_array_col.item"},{"id":2,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.struct_map_col.value.f1"},{"id":3,"vertexType":"COLUMN","vertexId":"c"},{"id":4,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.map_map_col.value.value"},{"id":5,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.map_map_col.value.key"},{"id":6,"vertexType":"COLUMN","vertexId":"d"},{"id":7,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.int_map_col.value"},{"id":8,"vertexType":"COLUMN","vertexId":"e"},{"id":9,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.complex_nested_struct_col.f2.item.f12.value.f21"},{"id":10,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.complex_nested_struct_col.f2.item.f12.key"},{"id":11,"vertexType":"COLUMN","vertexId":"functional.allcomplextypes.struct_map_col.key"}]} Expected: { "queryText":"select a + b as ab, c, d, e from functional.allcomplextypes t,\n (select sum(item) a from t.int_array_col\n where item < 10) v1,\n (select count(f1) b from t.struct_map_col\n group by key) v2,\n (select avg(value) over(partition by key) c from t.map_map_col.value) v3,\n (select item d from t.int_array_col\n union all\n select value from t.int_map_col) v4,\n (select f21 e from t.complex_nested_struct_col.f2.f12 order by key limit 10) v5", "hash":"4affc0d1e384475d1ff2fc2e19643064", "user":"dev", "timestamp":1446159272, "edges":[ { "sources":[ 1, 2 ], "targets":[ 0 ], "edgeType":"PROJECTION" }, { "sources":[ 4 ], "targets":[ 3 ], "edgeType":"PROJECTION" }, { "sources":[ 5 ], "targets":[ 3 ], "edgeType":"PREDICATE" }, { "sources":[ 2, 7 ], "targets":[ 6 ], "edgeType":"PROJECTION" }, { "sources":[ 9 ], "targets":[ 8 ], "edgeType":"PROJECTION" }, { "sources":[ 2, 10, 11 ], "targets":[ 0, 3, 6, 8 ], "edgeType":"PREDICATE" } ], "vertices":[ { "id":0, "vertexType":"COLUMN", "vertexId":"ab" }, { "id":1, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.struct_map_col.value.f1" }, { "id":2, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.int_array_col.item" }, { "id":3, "vertexType":"COLUMN", "vertexId":"c" }, { "id":4, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.map_map_col.value.value" }, { "id":5, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.map_map_col.value.key" }, { "id":6, "vertexType":"COLUMN", "vertexId":"d" }, { "id":7, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.int_map_col.value" }, { "id":8, "vertexType":"COLUMN", "vertexId":"e" }, { "id":9, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.complex_nested_struct_col.f2.item.f12.value.f21" }, { "id":10, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.complex_nested_struct_col.f2.item.f12.key" }, { "id":11, "vertexType":"COLUMN", "vertexId":"functional.allcomplextypes.struct_map_col.key" } ] } at org.junit.Assert.fail(Assert.java:88) at com.cloudera.impala.planner.PlannerTestBase.runPlannerTestFile(PlannerTestBase.java:676) at com.cloudera.impala.planner.PlannerTestBase.runPlannerTestFile(PlannerTestBase.java:681) at com.cloudera.impala.planner.PlannerTest.testLineage(PlannerTest.java:164)