Index: build.xml =================================================================== --- build.xml (revision 1467194) +++ build.xml (working copy) @@ -136,7 +136,6 @@ - @@ -510,14 +509,6 @@ - - - - - - - - @@ -766,9 +756,6 @@ - - @@ -962,7 +949,6 @@ - @@ -979,7 +965,6 @@ - @@ -1081,8 +1066,6 @@ todir="${mvn.jar.dir}" /> - - - - - @@ -1373,16 +1351,6 @@ output.file="${mvn.jar.dir}/hive-metastore-${version}.pom.asc" gpg.passphrase="${gpg.passphrase}"/> - - - - - @@ -198,7 +197,6 @@ - Index: ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out =================================================================== --- ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out (revision 0) +++ ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out (working copy) @@ -0,0 +1,2314 @@ +PREHOOK: query: create table src_10 as select * from src limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table src_10 as select * from src limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_10 +PREHOOK: query: create table src_lv1 (key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table src_lv1 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@src_lv1 +PREHOOK: query: create table src_lv2 (key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table src_lv2 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@src_lv2 +PREHOOK: query: create table src_lv3 (key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table src_lv3 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@src_lv3 +PREHOOK: query: -- 2LV +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[11]-FS[12] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -LVF[6]-SEL[7]-LVJ[10]-SEL[13]-FS[14] +-- -SEL[8]-UDTF[9]-LVJ[10] +explain +from src_10 +insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C +insert overwrite table src_lv2 select key, C lateral view explode(array(key+3, key+4)) A as C +PREHOOK: type: QUERY +POSTHOOK: query: -- 2LV +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[11]-FS[12] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -LVF[6]-SEL[7]-LVJ[10]-SEL[13]-FS[14] +-- -SEL[8]-UDTF[9]-LVJ[10] +explain +from src_10 +insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C +insert overwrite table src_lv2 select key, C lateral view explode(array(key+3, key+4)) A as C +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL C))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 1) (+ (TOK_TABLE_OR_COL key) 2))) C (TOK_TABALIAS A))))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL C))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 3) (+ (TOK_TABLE_OR_COL key) 4))) C (TOK_TABALIAS A)))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 + Stage-11 + Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-9 depends on stages: Stage-1 + Stage-10 + Stage-12 + Stage-13 depends on stages: Stage-12 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src_10 + TableScan + alias: src_10 + Lateral View Forward + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Select Operator + expressions: + expr: array((key + 1),(key + 2)) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Lateral View Forward + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + Select Operator + expressions: + expr: array((key + 3),(key + 4)) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-14 + Conditional Operator + + Stage: Stage-11 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-9 + Stats-Aggr Operator + + Stage: Stage-10 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-12 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-13 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + +PREHOOK: query: from src_10 +insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C +insert overwrite table src_lv2 select key, C lateral view explode(array(key+3, key+4)) A as C +PREHOOK: type: QUERY +PREHOOK: Input: default@src_10 +PREHOOK: Output: default@src_lv1 +PREHOOK: Output: default@src_lv2 +POSTHOOK: query: from src_10 +insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C +insert overwrite table src_lv2 select key, C lateral view explode(array(key+3, key+4)) A as C +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_10 +POSTHOOK: Output: default@src_lv1 +POSTHOOK: Output: default@src_lv2 +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from src_lv1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv1 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +238 239.0 +238 240.0 +86 87.0 +86 88.0 +311 312.0 +311 313.0 +27 28.0 +27 29.0 +165 166.0 +165 167.0 +409 410.0 +409 411.0 +255 256.0 +255 257.0 +278 279.0 +278 280.0 +98 99.0 +98 100.0 +484 485.0 +484 486.0 +PREHOOK: query: select * from src_lv2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv2 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv2 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +238 241.0 +238 242.0 +86 89.0 +86 90.0 +311 314.0 +311 315.0 +27 30.0 +27 31.0 +165 168.0 +165 169.0 +409 412.0 +409 413.0 +255 258.0 +255 259.0 +278 281.0 +278 282.0 +98 101.0 +98 102.0 +484 487.0 +484 488.0 +PREHOOK: query: -- 2(LV+GBY) +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[11]-GBY[12]-RS[13]-GBY[14]-SEL[15]-FS[16] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -LVF[6]-SEL[7]-LVJ[10]-SEL[17]-GBY[18]-RS[19]-GBY[20]-SEL[21]-FS[22] +-- -SEL[8]-UDTF[9]-LVJ[10] +explain +from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(C) lateral view explode(array(key+3, key+4)) A as C group by key +PREHOOK: type: QUERY +POSTHOOK: query: -- 2(LV+GBY) +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[11]-GBY[12]-RS[13]-GBY[14]-SEL[15]-FS[16] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -LVF[6]-SEL[7]-LVJ[10]-SEL[17]-GBY[18]-RS[19]-GBY[20]-SEL[21]-FS[22] +-- -SEL[8]-UDTF[9]-LVJ[10] +explain +from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(C) lateral view explode(array(key+3, key+4)) A as C group by key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL C)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 1) (+ (TOK_TABLE_OR_COL key) 2))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL C)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 3) (+ (TOK_TABLE_OR_COL key) 4))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src_10 + TableScan + alias: src_10 + Lateral View Forward + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: + expr: sum(_col2) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Select Operator + expressions: + expr: array((key + 1),(key + 2)) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: + expr: sum(_col2) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Lateral View Forward + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: + expr: sum(_col2) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: array((key + 3),(key + 4)) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: + expr: sum(_col2) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(C) lateral view explode(array(key+3, key+4)) A as C group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_10 +PREHOOK: Output: default@src_lv1 +PREHOOK: Output: default@src_lv2 +POSTHOOK: query: from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(C) lateral view explode(array(key+3, key+4)) A as C group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_10 +POSTHOOK: Output: default@src_lv1 +POSTHOOK: Output: default@src_lv2 +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from src_lv1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv1 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +165 333.0 +238 479.0 +255 513.0 +27 57.0 +278 559.0 +311 625.0 +409 821.0 +484 971.0 +86 175.0 +98 199.0 +PREHOOK: query: select * from src_lv2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv2 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv2 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +165 337.0 +238 483.0 +255 517.0 +27 61.0 +278 563.0 +311 629.0 +409 825.0 +484 975.0 +86 179.0 +98 203.0 +PREHOOK: query: -- (LV+GBY) + RS:2GBY +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[6]-GBY[7]-RS[8]-GBY[9]-SEL[10]-FS[11] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -FIL[12]-SEL[13]-RS[14]-FOR[15]-FIL[16]-GBY[17]-SEL[18]-FS[19] +-- -FIL[20]-GBY[21]-SEL[22]-FS[23] +explain +from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, count(value) where key > 200 group by key +insert overwrite table src_lv3 select key, count(value) where key < 200 group by key +PREHOOK: type: QUERY +POSTHOOK: query: -- (LV+GBY) + RS:2GBY +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[6]-GBY[7]-RS[8]-GBY[9]-SEL[10]-FS[11] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -FIL[12]-SEL[13]-RS[14]-FOR[15]-FIL[16]-GBY[17]-SEL[18]-FS[19] +-- -FIL[20]-GBY[21]-SEL[22]-FS[23] +explain +from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, count(value) where key > 200 group by key +insert overwrite table src_lv3 select key, count(value) where key < 200 group by key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL C)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 1) (+ (TOK_TABLE_OR_COL key) 2))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 200)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 200)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + src_10 + TableScan + alias: src_10 + Lateral View Forward + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: + expr: sum(_col2) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Select Operator + expressions: + expr: array((key + 1),(key + 2)) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: + expr: sum(_col2) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Filter Operator + predicate: + expr: ((key > 200.0) or (key < 200.0)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + value expressions: + expr: value + type: string + Reduce Operator Tree: + Forward + Filter Operator + predicate: + expr: (KEY._col0 > 200.0) + type: boolean + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + Filter Operator + predicate: + expr: (KEY._col0 < 200.0) + type: boolean + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + + Stage: Stage-7 + Stats-Aggr Operator + + +PREHOOK: query: from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, count(value) where key > 200 group by key +insert overwrite table src_lv3 select key, count(value) where key < 200 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_10 +PREHOOK: Output: default@src_lv1 +PREHOOK: Output: default@src_lv2 +PREHOOK: Output: default@src_lv3 +POSTHOOK: query: from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, count(value) where key > 200 group by key +insert overwrite table src_lv3 select key, count(value) where key < 200 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_10 +POSTHOOK: Output: default@src_lv1 +POSTHOOK: Output: default@src_lv2 +POSTHOOK: Output: default@src_lv3 +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +PREHOOK: query: select * from src_lv1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv1 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +165 333.0 +238 479.0 +255 513.0 +27 57.0 +278 559.0 +311 625.0 +409 821.0 +484 971.0 +86 175.0 +98 199.0 +PREHOOK: query: select * from src_lv2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv2 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv2 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +238 1 +255 1 +278 1 +311 1 +409 1 +484 1 +PREHOOK: query: select * from src_lv3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv3 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv3 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +165 1 +27 1 +86 1 +98 1 +PREHOOK: query: -- todo: shared distinct columns (should work with hive.optimize.multigroupby.common.distincts) +-- 2(LV+GBY) + RS:2GBY +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[11]-GBY[12]-RS[13]-GBY[14]-SEL[15]-FS[16] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -LVF[6]-SEL[7]-LVJ[10]-SEL[17]-GBY[18]-RS[19]-GBY[20]-SEL[21]-FS[22] +-- -SEL[8]-UDTF[9]-LVJ[10] +-- -SEL[23]-GBY[24]-RS[25]-GBY[26]-SEL[27]-FS[28] +explain +from src_10 +insert overwrite table src_lv1 select C, sum(distinct key) lateral view explode(array(key+1, key+2)) A as C group by C +insert overwrite table src_lv2 select C, sum(distinct key) lateral view explode(array(key+3, key+4)) A as C group by C +insert overwrite table src_lv3 select value, sum(distinct key) group by value +PREHOOK: type: QUERY +POSTHOOK: query: -- todo: shared distinct columns (should work with hive.optimize.multigroupby.common.distincts) +-- 2(LV+GBY) + RS:2GBY +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[11]-GBY[12]-RS[13]-GBY[14]-SEL[15]-FS[16] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -LVF[6]-SEL[7]-LVJ[10]-SEL[17]-GBY[18]-RS[19]-GBY[20]-SEL[21]-FS[22] +-- -SEL[8]-UDTF[9]-LVJ[10] +-- -SEL[23]-GBY[24]-RS[25]-GBY[26]-SEL[27]-FS[28] +explain +from src_10 +insert overwrite table src_lv1 select C, sum(distinct key) lateral view explode(array(key+1, key+2)) A as C group by C +insert overwrite table src_lv2 select C, sum(distinct key) lateral view explode(array(key+3, key+4)) A as C group by C +insert overwrite table src_lv3 select value, sum(distinct key) group by value +POSTHOOK: type: QUERY +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL C)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 1) (+ (TOK_TABLE_OR_COL key) 2))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL C))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL C)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 3) (+ (TOK_TABLE_OR_COL key) 4))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL C))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + src_10 + TableScan + alias: src_10 + Lateral View Forward + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col2 + type: double + expr: _col0 + type: string + outputColumnNames: _col2, _col0 + Group By Operator + aggregations: + expr: sum(DISTINCT _col0) + bucketGroup: false + keys: + expr: _col2 + type: double + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: double + tag: -1 + value expressions: + expr: _col2 + type: double + Select Operator + expressions: + expr: array((key + 1),(key + 2)) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col2 + type: double + expr: _col0 + type: string + outputColumnNames: _col2, _col0 + Group By Operator + aggregations: + expr: sum(DISTINCT _col0) + bucketGroup: false + keys: + expr: _col2 + type: double + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: double + tag: -1 + value expressions: + expr: _col2 + type: double + Lateral View Forward + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col2 + type: double + expr: _col0 + type: string + outputColumnNames: _col2, _col0 + Group By Operator + aggregations: + expr: sum(DISTINCT _col0) + bucketGroup: false + keys: + expr: _col2 + type: double + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: array((key + 3),(key + 4)) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col2 + type: double + expr: _col0 + type: string + outputColumnNames: _col2, _col0 + Group By Operator + aggregations: + expr: sum(DISTINCT _col0) + bucketGroup: false + keys: + expr: _col2 + type: double + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + Group By Operator + aggregations: + expr: sum(DISTINCT key) + bucketGroup: false + keys: + expr: value + type: string + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: double + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: double + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: double + tag: -1 + value expressions: + expr: _col2 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: double + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: double + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + + Stage: Stage-8 + Stats-Aggr Operator + + +PREHOOK: query: from src_10 +insert overwrite table src_lv1 select C, sum(distinct key) lateral view explode(array(key+1, key+2)) A as C group by C +insert overwrite table src_lv2 select C, sum(distinct key) lateral view explode(array(key+3, key+4)) A as C group by C +insert overwrite table src_lv3 select value, sum(distinct key) group by value +PREHOOK: type: QUERY +PREHOOK: Input: default@src_10 +PREHOOK: Output: default@src_lv1 +PREHOOK: Output: default@src_lv2 +PREHOOK: Output: default@src_lv3 +POSTHOOK: query: from src_10 +insert overwrite table src_lv1 select C, sum(distinct key) lateral view explode(array(key+1, key+2)) A as C group by C +insert overwrite table src_lv2 select C, sum(distinct key) lateral view explode(array(key+3, key+4)) A as C group by C +insert overwrite table src_lv3 select value, sum(distinct key) group by value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_10 +POSTHOOK: Output: default@src_lv1 +POSTHOOK: Output: default@src_lv2 +POSTHOOK: Output: default@src_lv3 +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: select * from src_lv1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv1 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +28.0 27.0 +29.0 27.0 +87.0 86.0 +88.0 86.0 +99.0 98.0 +100.0 98.0 +166.0 165.0 +167.0 165.0 +239.0 238.0 +240.0 238.0 +256.0 255.0 +257.0 255.0 +279.0 278.0 +280.0 278.0 +312.0 311.0 +313.0 311.0 +410.0 409.0 +411.0 409.0 +485.0 484.0 +486.0 484.0 +PREHOOK: query: select * from src_lv2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv2 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv2 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +30.0 27.0 +31.0 27.0 +89.0 86.0 +90.0 86.0 +101.0 98.0 +102.0 98.0 +168.0 165.0 +169.0 165.0 +241.0 238.0 +242.0 238.0 +258.0 255.0 +259.0 255.0 +281.0 278.0 +282.0 278.0 +314.0 311.0 +315.0 311.0 +412.0 409.0 +413.0 409.0 +487.0 484.0 +488.0 484.0 +PREHOOK: query: select * from src_lv3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv3 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv3 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +val_165 165.0 +val_238 238.0 +val_255 255.0 +val_27 27.0 +val_278 278.0 +val_311 311.0 +val_409 409.0 +val_484 484.0 +val_86 86.0 +val_98 98.0 +PREHOOK: query: create table src_lv4 (key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table src_lv4 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@src_lv4 +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: -- Common distincts optimization works across non-lateral view queries, but not across lateral view multi inserts +explain +from src_10 +insert overwrite table src_lv1 select key, sum(distinct C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(distinct C) lateral view explode(array(key+3, key+4)) A as C group by key +insert overwrite table src_lv3 select value, sum(distinct key) where key > 200 group by value +insert overwrite table src_lv4 select value, sum(distinct key) where key < 200 group by value +PREHOOK: type: QUERY +POSTHOOK: query: -- Common distincts optimization works across non-lateral view queries, but not across lateral view multi inserts +explain +from src_10 +insert overwrite table src_lv1 select key, sum(distinct C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(distinct C) lateral view explode(array(key+3, key+4)) A as C group by key +insert overwrite table src_lv3 select value, sum(distinct key) where key > 200 group by value +insert overwrite table src_lv4 select value, sum(distinct key) where key < 200 group by value +POSTHOOK: type: QUERY +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src_10))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL C)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 1) (+ (TOK_TABLE_OR_COL key) 2))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL C)))) (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (+ (TOK_TABLE_OR_COL key) 3) (+ (TOK_TABLE_OR_COL key) 4))) C (TOK_TABALIAS A)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 200)) (TOK_GROUPBY (TOK_TABLE_OR_COL value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_lv4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_TABLE_OR_COL key)))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 200)) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + src_10 + TableScan + alias: src_10 + Lateral View Forward + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: + expr: sum(DISTINCT _col2) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col2 + type: double + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: double + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + Select Operator + expressions: + expr: array((key + 1),(key + 2)) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: + expr: sum(DISTINCT _col2) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col2 + type: double + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: double + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + Lateral View Forward + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: + expr: sum(DISTINCT _col2) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col2 + type: double + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: array((key + 3),(key + 4)) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: double + outputColumnNames: _col0, _col2 + Group By Operator + aggregations: + expr: sum(DISTINCT _col2) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col2 + type: double + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Filter Operator + predicate: + expr: ((key > 200.0) or (key < 200.0)) + type: boolean + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: double + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: value + type: string + expr: key + type: string + sort order: ++ + Map-reduce partition columns: + expr: value + type: string + tag: -1 + Reduce Operator Tree: + Forward + Filter Operator + predicate: + expr: (KEY._col1:0._col0 > 200.0) + type: boolean + Group By Operator + aggregations: + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + Filter Operator + predicate: + expr: (KEY._col1:0._col0 < 200.0) + type: boolean + Group By Operator + aggregations: + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 4 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv4 + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + + Stage: Stage-9 + Stats-Aggr Operator + + Stage: Stage-3 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv4 + + Stage: Stage-10 + Stats-Aggr Operator + + +PREHOOK: query: from src_10 +insert overwrite table src_lv1 select key, sum(distinct C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(distinct C) lateral view explode(array(key+3, key+4)) A as C group by key +insert overwrite table src_lv3 select value, sum(distinct key) where key > 200 group by value +insert overwrite table src_lv4 select value, sum(distinct key) where key < 200 group by value +PREHOOK: type: QUERY +PREHOOK: Input: default@src_10 +PREHOOK: Output: default@src_lv1 +PREHOOK: Output: default@src_lv2 +PREHOOK: Output: default@src_lv3 +PREHOOK: Output: default@src_lv4 +POSTHOOK: query: from src_10 +insert overwrite table src_lv1 select key, sum(distinct C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(distinct C) lateral view explode(array(key+3, key+4)) A as C group by key +insert overwrite table src_lv3 select value, sum(distinct key) where key > 200 group by value +insert overwrite table src_lv4 select value, sum(distinct key) where key < 200 group by value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_10 +POSTHOOK: Output: default@src_lv1 +POSTHOOK: Output: default@src_lv2 +POSTHOOK: Output: default@src_lv3 +POSTHOOK: Output: default@src_lv4 +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [] +POSTHOOK: Lineage: src_lv4.value EXPRESSION [] +PREHOOK: query: select * from src_lv1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv1 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [] +POSTHOOK: Lineage: src_lv4.value EXPRESSION [] +165 333.0 +238 479.0 +255 513.0 +27 57.0 +278 559.0 +311 625.0 +409 821.0 +484 971.0 +86 175.0 +98 199.0 +PREHOOK: query: select * from src_lv2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv2 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv2 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [] +POSTHOOK: Lineage: src_lv4.value EXPRESSION [] +165 337.0 +238 483.0 +255 517.0 +27 61.0 +278 563.0 +311 629.0 +409 825.0 +484 975.0 +86 179.0 +98 203.0 +PREHOOK: query: select * from src_lv3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv3 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv3 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [] +POSTHOOK: Lineage: src_lv4.value EXPRESSION [] +val_238 238.0 +val_255 255.0 +val_278 278.0 +val_311 311.0 +val_409 409.0 +val_484 484.0 +PREHOOK: query: select * from src_lv4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_lv4 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_lv4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_lv4 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv1.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.key SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv2.value SCRIPT [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv2.value EXPRESSION [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.key SIMPLE [(src_10)src_10.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.null, ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [(src_10)src_10.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: src_lv3.value EXPRESSION [] +POSTHOOK: Lineage: src_lv4.value EXPRESSION [] +val_165 165.0 +val_27 27.0 +val_86 86.0 +val_98 98.0 Index: ql/src/test/queries/clientpositive/multi_insert_lateral_view.q =================================================================== --- ql/src/test/queries/clientpositive/multi_insert_lateral_view.q (revision 0) +++ ql/src/test/queries/clientpositive/multi_insert_lateral_view.q (working copy) @@ -0,0 +1,102 @@ +create table src_10 as select * from src limit 10; + +create table src_lv1 (key string, value string); +create table src_lv2 (key string, value string); +create table src_lv3 (key string, value string); + +-- 2LV +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[11]-FS[12] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -LVF[6]-SEL[7]-LVJ[10]-SEL[13]-FS[14] +-- -SEL[8]-UDTF[9]-LVJ[10] +explain +from src_10 +insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C +insert overwrite table src_lv2 select key, C lateral view explode(array(key+3, key+4)) A as C; + +from src_10 +insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C +insert overwrite table src_lv2 select key, C lateral view explode(array(key+3, key+4)) A as C; + +select * from src_lv1; +select * from src_lv2; + +-- 2(LV+GBY) +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[11]-GBY[12]-RS[13]-GBY[14]-SEL[15]-FS[16] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -LVF[6]-SEL[7]-LVJ[10]-SEL[17]-GBY[18]-RS[19]-GBY[20]-SEL[21]-FS[22] +-- -SEL[8]-UDTF[9]-LVJ[10] +explain +from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(C) lateral view explode(array(key+3, key+4)) A as C group by key; + +from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(C) lateral view explode(array(key+3, key+4)) A as C group by key; + +select * from src_lv1; +select * from src_lv2; + +-- (LV+GBY) + RS:2GBY +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[6]-GBY[7]-RS[8]-GBY[9]-SEL[10]-FS[11] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -FIL[12]-SEL[13]-RS[14]-FOR[15]-FIL[16]-GBY[17]-SEL[18]-FS[19] +-- -FIL[20]-GBY[21]-SEL[22]-FS[23] +explain +from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, count(value) where key > 200 group by key +insert overwrite table src_lv3 select key, count(value) where key < 200 group by key; + +from src_10 +insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, count(value) where key > 200 group by key +insert overwrite table src_lv3 select key, count(value) where key < 200 group by key; + +select * from src_lv1; +select * from src_lv2; +select * from src_lv3; + +-- todo: shared distinct columns (should work with hive.optimize.multigroupby.common.distincts) +-- 2(LV+GBY) + RS:2GBY +-- TS[0]-LVF[1]-SEL[2]-LVJ[5]-SEL[11]-GBY[12]-RS[13]-GBY[14]-SEL[15]-FS[16] +-- -SEL[3]-UDTF[4]-LVJ[5] +-- -LVF[6]-SEL[7]-LVJ[10]-SEL[17]-GBY[18]-RS[19]-GBY[20]-SEL[21]-FS[22] +-- -SEL[8]-UDTF[9]-LVJ[10] +-- -SEL[23]-GBY[24]-RS[25]-GBY[26]-SEL[27]-FS[28] +explain +from src_10 +insert overwrite table src_lv1 select C, sum(distinct key) lateral view explode(array(key+1, key+2)) A as C group by C +insert overwrite table src_lv2 select C, sum(distinct key) lateral view explode(array(key+3, key+4)) A as C group by C +insert overwrite table src_lv3 select value, sum(distinct key) group by value; + +from src_10 +insert overwrite table src_lv1 select C, sum(distinct key) lateral view explode(array(key+1, key+2)) A as C group by C +insert overwrite table src_lv2 select C, sum(distinct key) lateral view explode(array(key+3, key+4)) A as C group by C +insert overwrite table src_lv3 select value, sum(distinct key) group by value; + +select * from src_lv1; +select * from src_lv2; +select * from src_lv3; + +create table src_lv4 (key string, value string); + +-- Common distincts optimization works across non-lateral view queries, but not across lateral view multi inserts +explain +from src_10 +insert overwrite table src_lv1 select key, sum(distinct C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(distinct C) lateral view explode(array(key+3, key+4)) A as C group by key +insert overwrite table src_lv3 select value, sum(distinct key) where key > 200 group by value +insert overwrite table src_lv4 select value, sum(distinct key) where key < 200 group by value; + +from src_10 +insert overwrite table src_lv1 select key, sum(distinct C) lateral view explode(array(key+1, key+2)) A as C group by key +insert overwrite table src_lv2 select key, sum(distinct C) lateral view explode(array(key+3, key+4)) A as C group by key +insert overwrite table src_lv3 select value, sum(distinct key) where key > 200 group by value +insert overwrite table src_lv4 select value, sum(distinct key) where key < 200 group by value; + +select * from src_lv1; +select * from src_lv2; +select * from src_lv3; +select * from src_lv4; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 1467194) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy) @@ -1549,6 +1549,7 @@ start++; } builder.append(name); + start += name.length(); if (added) { if (op.getNumChild() > 0) { List> children = op.getChildOperators(); @@ -1559,7 +1560,7 @@ builder.append(' '); } } - toString(builder, visited, children.get(i), start += name.length()); + toString(builder, visited, children.get(i), start); } } return true; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1467194) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -1023,7 +1023,13 @@ .getMsg(partition.toString())); } } - + skipRecursion = false; + break; + case HiveParser.TOK_LATERAL_VIEW: + // todo: nested LV + assert ast.getChildCount() == 1; + qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast); + break; default: skipRecursion = false; break; @@ -3989,7 +3995,7 @@ } @SuppressWarnings({"nls"}) - private Operator genGroupByPlan1MRMultiReduceGB(List dests, QB qb, Operator input) + private Operator genGroupByPlan1ReduceMultiGBY(List dests, QB qb, Operator input) throws SemanticException { QBParseInfo parseInfo = qb.getParseInfo(); @@ -6811,9 +6817,14 @@ // Return the common distinct expression // There should be more than 1 destination, with group bys in all of them. private List getCommonDistinctExprs(QB qb, Operator input) { - RowResolver inputRR = opParseCtx.get(input).getRowResolver(); QBParseInfo qbp = qb.getParseInfo(); + // If a grouping set aggregation is present, common processing is not possible + if (!qbp.getDestCubes().isEmpty() || !qbp.getDestRollups().isEmpty() + || !qbp.getDestToLateralView().isEmpty()) { + return null; + } + RowResolver inputRR = opParseCtx.get(input).getRowResolver(); TreeSet ks = new TreeSet(); ks.addAll(qbp.getClauseNames()); @@ -6822,15 +6833,10 @@ return null; } - List oldList = null; + List oldList = null; List oldASTList = null; for (String dest : ks) { - // If a grouping set aggregation is present, common processing is not possible - if (!qbp.getDestCubes().isEmpty() || !qbp.getDestRollups().isEmpty()) { - return null; - } - // If a filter is present, common processing is not possible if (qbp.getWhrForClause(dest) != null) { return null; @@ -6847,7 +6853,7 @@ return null; } - List currDestList; + List currDestList; try { currDestList = getDistinctExprs(qbp, dest, inputRR); } catch (SemanticException e) { @@ -6968,10 +6974,9 @@ // Groups the clause names into lists so that any two clauses in the same list has the same // group by and distinct keys and no clause appears in more than one list. Returns a list of the // lists of clauses. - private List> getCommonGroupByDestGroups(QB qb, Operator input) - throws SemanticException { + private List> getCommonGroupByDestGroups(QB qb, + Map> inputs) throws SemanticException { - RowResolver inputRR = opParseCtx.get(input).getRowResolver(); QBParseInfo qbp = qb.getParseInfo(); TreeSet ks = new TreeSet(); @@ -6989,29 +6994,31 @@ return commonGroupByDestGroups; } - List> sprayKeyLists = - new ArrayList>(ks.size()); + List> inputOperators = + new ArrayList>(ks.size()); + List> sprayKeyLists = new ArrayList>(ks.size()); // Iterate over each clause for (String dest : ks) { + Operator input = inputs.get(dest); + RowResolver inputRR = opParseCtx.get(input).getRowResolver(); + List sprayKeys = getDistinctExprs(qbp, dest, inputRR); - List sprayKeys = - getDistinctExprs(qbp, dest, inputRR); - // Add the group by expressions List grpByExprs = getGroupByForClause(qbp, dest); for (ASTNode grpByExpr : grpByExprs) { - ExprNodeDesc.ExprNodeDescEqualityWrapper grpByExprWrapper = - new ExprNodeDesc.ExprNodeDescEqualityWrapper(genExprNodeDesc(grpByExpr, inputRR)); - if (!sprayKeys.contains(grpByExprWrapper)) { - sprayKeys.add(grpByExprWrapper); + ExprNodeDesc exprDesc = genExprNodeDesc(grpByExpr, inputRR); + if (ExprNodeDescUtils.indexOf(exprDesc, sprayKeys) < 0) { + sprayKeys.add(exprDesc); } } // Loop through each of the lists of exprs, looking for a match boolean found = false; for (int i = 0; i < sprayKeyLists.size(); i++) { - + if (!input.equals(inputOperators.get(i))) { + continue; + } if (!matchExprLists(sprayKeyLists.get(i), sprayKeys)) { continue; } @@ -7024,6 +7031,7 @@ // No match was found, so create new entries if (!found) { + inputOperators.add(input); sprayKeyLists.add(sprayKeys); List destGroup = new ArrayList(); destGroup.add(dest); @@ -7035,15 +7043,13 @@ } // Returns whether or not two lists contain the same elements independent of order - private boolean matchExprLists(List list1, - List list2) { + private boolean matchExprLists(List list1, List list2) { if (list1.size() != list2.size()) { return false; } - - for (ExprNodeDesc.ExprNodeDescEqualityWrapper exprNodeDesc : list1) { - if (!list2.contains(exprNodeDesc)) { + for (ExprNodeDesc exprNodeDesc : list1) { + if (ExprNodeDescUtils.indexOf(exprNodeDesc, list2) < 0) { return false; } } @@ -7051,23 +7057,20 @@ return true; } - // Returns a list of the distinct exprs for a given clause name as - // ExprNodeDesc.ExprNodeDescEqualityWrapper without duplicates - private List - getDistinctExprs(QBParseInfo qbp, String dest, RowResolver inputRR) throws SemanticException { + // Returns a list of the distinct exprs without duplicates for a given clause name + private List getDistinctExprs(QBParseInfo qbp, String dest, RowResolver inputRR) + throws SemanticException { List distinctAggExprs = qbp.getDistinctFuncExprsForClause(dest); - List distinctExprs = - new ArrayList(); + List distinctExprs = new ArrayList(); for (ASTNode distinctAggExpr : distinctAggExprs) { // 0 is function name for (int i = 1; i < distinctAggExpr.getChildCount(); i++) { ASTNode parameter = (ASTNode) distinctAggExpr.getChild(i); - ExprNodeDesc.ExprNodeDescEqualityWrapper distinctExpr = - new ExprNodeDesc.ExprNodeDescEqualityWrapper(genExprNodeDesc(parameter, inputRR)); - if (!distinctExprs.contains(distinctExpr)) { - distinctExprs.add(distinctExpr); + ExprNodeDesc expr = genExprNodeDesc(parameter, inputRR); + if (ExprNodeDescUtils.indexOf(expr, distinctExprs) < 0) { + distinctExprs.add(expr); } } } @@ -7096,6 +7099,7 @@ QBParseInfo qbp = qb.getParseInfo(); TreeSet ks = new TreeSet(qbp.getClauseNames()); + Map> inputs = createInputForDests(qb, input, ks); // For multi-group by with the same distinct, we ignore all user hints // currently. It doesnt matter whether he has asked to do // map-side aggregation or not. Map side aggregation is turned off @@ -7148,7 +7152,7 @@ // expressions, otherwise treat all the expressions as a single group if (conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) { try { - commonGroupByDestGroups = getCommonGroupByDestGroups(qb, curr); + commonGroupByDestGroups = getCommonGroupByDestGroups(qb, inputs); } catch (SemanticException e) { LOG.error("Failed to group clauses by common spray keys.", e); } @@ -7168,6 +7172,8 @@ } String firstDest = commonGroupByDestGroup.get(0); + input = inputs.get(firstDest); + // Constructs a standard group by plan if: // There is no other subquery with the same group by/distinct keys or // (There are no aggregations in a representative query for the group and @@ -7182,7 +7188,7 @@ // Go over all the destination tables for (String dest : commonGroupByDestGroup) { - curr = input; + curr = inputs.get(dest); if (qbp.getWhrForClause(dest) != null) { curr = genFilterPlan(dest, qb, curr); @@ -7215,7 +7221,7 @@ curr = genPostGroupByBodyPlan(curr, dest, qb); } } else { - curr = genGroupByPlan1MRMultiReduceGB(commonGroupByDestGroup, qb, input); + curr = genGroupByPlan1ReduceMultiGBY(commonGroupByDestGroup, qb, input); } } } @@ -7228,6 +7234,16 @@ return curr; } + private Map> createInputForDests(QB qb, + Operator input, Set dests) throws SemanticException { + Map> inputs = + new HashMap>(); + for (String dest : dests) { + inputs.put(dest, genLateralViewPlanForDest(dest, qb, input)); + } + return inputs; + } + private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb) throws SemanticException { @@ -8037,76 +8053,91 @@ // -> LateralViewJoinOperator // - RowResolver lvForwardRR = new RowResolver(); - RowResolver source = opParseCtx.get(op).getRowResolver(); - for (ColumnInfo col : source.getColumnInfos()) { - if (col.getIsVirtualCol() && col.isHiddenVirtualCol()) { - continue; - } - String[] tabCol = source.reverseLookup(col.getInternalName()); - lvForwardRR.put(tabCol[0], tabCol[1], col); - } + Operator lateralViewJoin = genLateralViewPlan(qb, op, lateralViewTree); + op = lateralViewJoin; + } + e.setValue(op); + } + } + } - Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild( - new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()), - op), lvForwardRR); + private Operator genLateralViewPlanForDest(String dest, QB qb, Operator op) + throws SemanticException { + ASTNode lateralViewTree = qb.getParseInfo().getDestToLateralView().get(dest); + if (lateralViewTree != null) { + return genLateralViewPlan(qb, op, lateralViewTree); + } + return op; + } - // The order in which the two paths are added is important. The - // lateral view join operator depends on having the select operator - // give it the row first. + private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) + throws SemanticException { + RowResolver lvForwardRR = new RowResolver(); + RowResolver source = opParseCtx.get(op).getRowResolver(); + for (ColumnInfo col : source.getColumnInfos()) { + if (col.getIsVirtualCol() && col.isHiddenVirtualCol()) { + continue; + } + String[] tabCol = source.reverseLookup(col.getInternalName()); + lvForwardRR.put(tabCol[0], tabCol[1], col); + } - // Get the all path by making a select(*). - RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver(); - // Operator allPath = op; - Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild( - new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()), - lvForward), allPathRR); - // Get the UDTF Path - QB blankQb = new QB(null, null, false); - Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree - .getChild(0), blankQb, lvForward); - // add udtf aliases to QB - for (String udtfAlias : blankQb.getAliases()) { - qb.addAlias(udtfAlias); - } - RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver(); + Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild( + new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()), + op), lvForwardRR); - // Merge the two into the lateral view join - // The cols of the merged result will be the combination of both the - // cols of the UDTF path and the cols of the all path. The internal - // names have to be changed to avoid conflicts + // The order in which the two paths are added is important. The + // lateral view join operator depends on having the select operator + // give it the row first. - RowResolver lateralViewRR = new RowResolver(); - ArrayList outputInternalColNames = new ArrayList(); + // Get the all path by making a select(*). + RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver(); + // Operator allPath = op; + Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild( + new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()), + lvForward), allPathRR); + // Get the UDTF Path + QB blankQb = new QB(null, null, false); + Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree + .getChild(0), blankQb, lvForward); + // add udtf aliases to QB + for (String udtfAlias : blankQb.getAliases()) { + qb.addAlias(udtfAlias); + } + RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver(); - LVmergeRowResolvers(allPathRR, lateralViewRR, outputInternalColNames); - LVmergeRowResolvers(udtfPathRR, lateralViewRR, outputInternalColNames); + // Merge the two into the lateral view join + // The cols of the merged result will be the combination of both the + // cols of the UDTF path and the cols of the all path. The internal + // names have to be changed to avoid conflicts - // For PPD, we need a column to expression map so that during the walk, - // the processor knows how to transform the internal col names. - // Following steps are dependant on the fact that we called - // LVmerge.. in the above order - Map colExprMap = new HashMap(); + RowResolver lateralViewRR = new RowResolver(); + ArrayList outputInternalColNames = new ArrayList(); - int i = 0; - for (ColumnInfo c : allPathRR.getColumnInfos()) { - String internalName = getColumnInternalName(i); - i++; - colExprMap.put(internalName, - new ExprNodeColumnDesc(c.getType(), c.getInternalName(), - c.getTabAlias(), c.getIsVirtualCol())); - } + LVmergeRowResolvers(allPathRR, lateralViewRR, outputInternalColNames); + LVmergeRowResolvers(udtfPathRR, lateralViewRR, outputInternalColNames); - Operator lateralViewJoin = putOpInsertMap(OperatorFactory - .getAndMakeChild(new LateralViewJoinDesc(outputInternalColNames), - new RowSchema(lateralViewRR.getColumnInfos()), allPath, - udtfPath), lateralViewRR); - lateralViewJoin.setColumnExprMap(colExprMap); - op = lateralViewJoin; - } - e.setValue(op); - } + // For PPD, we need a column to expression map so that during the walk, + // the processor knows how to transform the internal col names. + // Following steps are dependant on the fact that we called + // LVmerge.. in the above order + Map colExprMap = new HashMap(); + + int i = 0; + for (ColumnInfo c : allPathRR.getColumnInfos()) { + String internalName = getColumnInternalName(i); + i++; + colExprMap.put(internalName, + new ExprNodeColumnDesc(c.getType(), c.getInternalName(), + c.getTabAlias(), c.getIsVirtualCol())); } + + Operator lateralViewJoin = putOpInsertMap(OperatorFactory + .getAndMakeChild(new LateralViewJoinDesc(outputInternalColNames), + new RowSchema(lateralViewRR.getColumnInfos()), allPath, + udtfPath), lateralViewRR); + lateralViewJoin.setColumnExprMap(colExprMap); + return lateralViewJoin; } /** @@ -8704,7 +8735,7 @@ } if (LOG.isDebugEnabled()) { - LOG.debug("\n" + Operator.toString(pCtx.getTopOps().values())); + LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values())); } Optimizer optm = new Optimizer(); @@ -8713,7 +8744,7 @@ pCtx = optm.optimize(); if (LOG.isDebugEnabled()) { - LOG.debug("\n" + Operator.toString(pCtx.getTopOps().values())); + LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values())); } // Generate column access stats if required - wait until column pruning takes place Index: ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (revision 1467194) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (working copy) @@ -1828,6 +1828,7 @@ : insertClause selectClause + lateralView? whereClause? groupByClause? havingClause? @@ -1836,11 +1837,12 @@ distributeByClause? sortByClause? window_clause? - limitClause? -> ^(TOK_INSERT insertClause? - selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause? + limitClause? -> ^(TOK_INSERT insertClause + selectClause lateralView? whereClause? groupByClause? havingClause? orderByClause? clusterByClause? distributeByClause? sortByClause? window_clause? limitClause?) | selectClause + lateralView? whereClause? groupByClause? havingClause? @@ -1850,7 +1852,7 @@ sortByClause? window_clause? limitClause? -> ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) - selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause? + selectClause lateralView? whereClause? groupByClause? havingClause? orderByClause? clusterByClause? distributeByClause? sortByClause? window_clause? limitClause?) ; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (revision 1467194) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (working copy) @@ -90,6 +90,8 @@ */ private final HashMap> aliasToLateralViews; + private final HashMap destToLateralView; + /* Order by clause */ private final HashMap destToOrderby; private final HashMap destToLimit; @@ -111,6 +113,7 @@ nameToDest = new HashMap(); nameToSample = new HashMap(); exprToColumnAlias = new HashMap(); + destToLateralView = new HashMap(); destToSelExpr = new LinkedHashMap(); destToWhereExpr = new HashMap(); destToGroupby = new HashMap(); @@ -552,6 +555,9 @@ return nameToSample; } + public HashMap getDestToLateralView() { + return destToLateralView; + } protected static enum ClauseType { CLUSTER_BY_CLAUSE,