Index: ql/src/test/results/clientpositive/smb_mapjoin_14.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_14.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_14.q.out (working copy) @@ -0,0 +1,1772 @@ +PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 +select * from src where key < 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +POSTHOOK: query: insert overwrite table tbl1 +select * from src where key < 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 +select * from src where key < 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +POSTHOOK: query: insert overwrite table tbl2 +select * from src where key < 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +8 val_8 8 val_8 +10 val_10 10 val_10 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +9 val_9 9 val_9 +11 val_11 11 val_11 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +17 val_17 17 val_17 +19 val_19 19 val_19 +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +8 val_8 8 val_8 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +9 val_9 9 val_9 +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +8 val_8 8 val_8 +10 val_10 10 val_10 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +9 val_9 9 val_9 +11 val_11 11 val_11 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +17 val_17 17 val_17 +19 val_19 19 val_19 +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +8 val_8 8 val_8 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +9 val_9 9 val_9 +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +8 val_8 8 val_8 +10 val_10 10 val_10 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +9 val_9 9 val_9 +11 val_11 11 val_11 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +17 val_17 17 val_17 +19 val_19 19 val_19 +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +8 val_8 8 val_8 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +9 val_9 9 val_9 +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +8 val_8 8 val_8 +10 val_10 10 val_10 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +9 val_9 9 val_9 +11 val_11 11 val_11 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +17 val_17 17 val_17 +19 val_19 19 val_19 +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +8 val_8 8 val_8 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +9 val_9 9 val_9 +PREHOOK: query: select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +2 val_2 val_2 +4 val_4 val_4 +8 val_8 val_8 +10 val_10 val_10 +12 val_12 val_12 +12 val_12 val_12 +12 val_12 val_12 +12 val_12 val_12 +18 val_18 val_18 +18 val_18 val_18 +18 val_18 val_18 +18 val_18 val_18 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +9 val_9 val_9 +11 val_11 val_11 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +17 val_17 val_17 +19 val_19 val_19 +PREHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be still converted to a sort-merge join +explain +select count(*) from ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be still converted to a sort-merge join +explain +select count(*) from ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq1:b + TableScan + alias: b + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Select Operator + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(*) from ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +38 +PREHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be still converted to a sort-merge join +-- Add a order by at the end to make the results deterministic. +explain +select key, count(*) from +( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +order by key +PREHOOK: type: QUERY +POSTHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be still converted to a sort-merge join +-- Add a order by at the end to make the results deterministic. +explain +select key, count(*) from +( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +order by key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq1:b + TableScan + alias: b + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select key, count(*) from +( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from +( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 9 +2 1 +4 1 +5 9 +8 1 +9 1 +10 1 +11 1 +12 4 +15 4 +17 1 +18 4 +19 1 +PREHOOK: query: -- The mapjoin is being performed as part of more than one sub-query. It should be still converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +POSTHOOK: query: -- The mapjoin is being performed as part of more than one sub-query. It should be still converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq2:subq1:b + TableScan + alias: b + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(*) from +( + select key, count(*) from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +( + select key, count(*) from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +13 +PREHOOK: query: -- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-5 is a root stage + Stage-6 depends on stages: Stage-5 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src1:subq1:b + TableScan + alias: b + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 0 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + $INTNAME1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + src2:subq2:b + TableScan + alias: b + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 9 9 +2 1 1 +4 1 1 +5 9 9 +8 1 1 +9 1 1 +10 1 1 +11 1 1 +12 4 4 +15 4 4 +17 1 1 +18 4 4 +19 1 1 +PREHOOK: query: explain +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq2:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 10) + type: boolean + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0, _col1, _col2, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +8 val_8 8 val_8 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +9 val_9 9 val_9 +PREHOOK: query: explain +select /*+mapjoin(subq1)*/ * from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(subq1)*/ * from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq2:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 10) + type: boolean + Select Operator + expressions: + expr: key + type: int + expr: concat(value, value) + type: string + outputColumnNames: _col0, _col1 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0, _col1, _col2, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0val_0 0 val_0val_0 +0 val_0val_0 0 val_0val_0 +0 val_0val_0 0 val_0val_0 +0 val_0val_0 0 val_0val_0 +0 val_0val_0 0 val_0val_0 +0 val_0val_0 0 val_0val_0 +0 val_0val_0 0 val_0val_0 +0 val_0val_0 0 val_0val_0 +0 val_0val_0 0 val_0val_0 +2 val_2val_2 2 val_2val_2 +4 val_4val_4 4 val_4val_4 +8 val_8val_8 8 val_8val_8 +5 val_5val_5 5 val_5val_5 +5 val_5val_5 5 val_5val_5 +5 val_5val_5 5 val_5val_5 +5 val_5val_5 5 val_5val_5 +5 val_5val_5 5 val_5val_5 +5 val_5val_5 5 val_5val_5 +5 val_5val_5 5 val_5val_5 +5 val_5val_5 5 val_5val_5 +5 val_5val_5 5 val_5val_5 +9 val_9val_9 9 val_9val_9 +PREHOOK: query: explain +select /*+mapjoin(subq1)*/ * from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(subq1)*/ * from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 10) + type: boolean + Select Operator + expressions: + expr: (key + 1) + type: int + expr: concat(value, value) + type: string + outputColumnNames: _col0, _col1 + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq2:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 10) + type: boolean + Select Operator + expressions: + expr: (key + 1) + type: int + expr: concat(value, value) + type: string + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0, _col1, _col2, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ * from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1 val_0val_0 1 val_0val_0 +1 val_0val_0 1 val_0val_0 +1 val_0val_0 1 val_0val_0 +1 val_0val_0 1 val_0val_0 +1 val_0val_0 1 val_0val_0 +1 val_0val_0 1 val_0val_0 +1 val_0val_0 1 val_0val_0 +1 val_0val_0 1 val_0val_0 +1 val_0val_0 1 val_0val_0 +3 val_2val_2 3 val_2val_2 +5 val_4val_4 5 val_4val_4 +9 val_8val_8 9 val_8val_8 +6 val_5val_5 6 val_5val_5 +6 val_5val_5 6 val_5val_5 +6 val_5val_5 6 val_5val_5 +6 val_5val_5 6 val_5val_5 +6 val_5val_5 6 val_5val_5 +6 val_5val_5 6 val_5val_5 +6 val_5val_5 6 val_5val_5 +6 val_5val_5 6 val_5val_5 +6 val_5val_5 6 val_5val_5 +10 val_9val_9 10 val_9val_9 Index: ql/src/test/queries/clientpositive/smb_mapjoin_14.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_14.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_14.q (working copy) @@ -0,0 +1,191 @@ +set hive.enforce.bucketing = true; +set hive.enforce.sorting = true; +set hive.exec.reducers.max = 1; + +CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +insert overwrite table tbl1 +select * from src where key < 20; + +insert overwrite table tbl2 +select * from src where key < 20; + +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key; + +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key; + +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; + +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key; + +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key; + +set hive.optimize.bucketmapjoin = true; + +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key; + +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key; + +set hive.optimize.bucketmapjoin.sortedmerge = true; +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a) subq1 + join + (select a.key as key, a.value as value from tbl2 a) subq2 + on subq1.key = subq2.key; + +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key; + +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; + +select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key; + +-- The mapjoin is being performed as part of sub-query. It should be still converted to a sort-merge join +explain +select count(*) from ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1; + +select count(*) from ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1; + +-- The mapjoin is being performed as part of sub-query. It should be still converted to a sort-merge join +-- Add a order by at the end to make the results deterministic. +explain +select key, count(*) from +( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +order by key; + +select key, count(*) from +( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +order by key; + +-- The mapjoin is being performed as part of more than one sub-query. It should be still converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2; + +select count(*) from +( + select key, count(*) from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2; + +-- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key; + +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key; + +explain +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key; + +select /*+mapjoin(subq1)*/ * from + (select a.key as key, a.value as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key; + +explain +select /*+mapjoin(subq1)*/ * from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key; + +select /*+mapjoin(subq1)*/ * from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key; + +explain +select /*+mapjoin(subq1)*/ * from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key; + +select /*+mapjoin(subq1)*/ * from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a where key < 10) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a where key < 10) subq2 + on subq1.key = subq2.key; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (revision 1411097) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (working copy) @@ -30,7 +30,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; @@ -52,12 +52,11 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.TableAccessAnalyzer; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; //try to replace a bucket map join with a sorted merge map join public class SortedMergeBucketMapJoinOptimizer implements Transform { @@ -134,6 +133,13 @@ return false; } String[] srcs = joinCxt.getBaseSrc(); + String[] newSrcs = new String[srcs.length]; + for (int srcPos = 0; srcPos < srcs.length; srcPos++) { + newSrcs[srcPos] = + joinCxt.getId() == null ? srcs[srcPos] : joinCxt.getId() + ":" + srcs[srcPos]; + } + + srcs = newSrcs; int pos = 0; // All the tables/partitions columns should be sorted in the same order @@ -147,9 +153,9 @@ && isTableSorted(this.pGraphContext, mapJoinOp, joinCxt, - src, pos, - sortColumnsFirstTable); + sortColumnsFirstTable, + srcs); pos++; } if (!tableSorted) { @@ -196,13 +202,37 @@ this.pGraphContext.getListMapJoinOpsNoReducer().add(indexInListMapJoinNoReducer, smbJop); } + Map aliasToSink = + new HashMap(); + // For all parents (other than the big table), insert a dummy store operator List parentOperators = mapJoinOp.getParentOperators(); for (int i = 0; i < parentOperators.size(); i++) { Operator par = parentOperators.get(i); int index = par.getChildOperators().indexOf(mapJoinOp); par.getChildOperators().remove(index); - par.getChildOperators().add(index, smbJop); + if (i == smbJoinDesc.getPosBigTable()) { + par.getChildOperators().add(index, smbJop); + } + else { + DummyStoreOperator dummyStoreOp = new DummyStoreOperator(); + par.getChildOperators().add(index, dummyStoreOp); + + List> childrenOps = + new ArrayList>(); + childrenOps.add(smbJop); + dummyStoreOp.setChildOperators(childrenOps); + + List> parentOps = + new ArrayList>(); + parentOps.add(par); + dummyStoreOp.setParentOperators(parentOps); + + aliasToSink.put(srcs[i], dummyStoreOp); + smbJop.getParentOperators().remove(i); + smbJop.getParentOperators().add(i, dummyStoreOp); + } } + smbJoinDesc.setAliasToSink(aliasToSink); List childOps = mapJoinOp.getChildOperators(); for (int i = 0; i < childOps.size(); i++) { Operator child = childOps.get(i); @@ -213,6 +243,17 @@ return smbJop; } + public List toColumns(List keys) { + List columns = new ArrayList(); + for (ExprNodeDesc key : keys) { + if (!(key instanceof ExprNodeColumnDesc)) { + return null; + } + columns.add(((ExprNodeColumnDesc) key).getColumn()); + } + return columns; + } + /** * Whether this table is eligible for a sort-merge join. * @@ -229,40 +270,44 @@ private boolean isTableSorted(ParseContext pctx, MapJoinOperator op, QBJoinTree joinTree, - String alias, int pos, - List sortColumnsFirstTable) + List sortColumnsFirstTable, + String[] aliases) throws SemanticException { - - Map> topOps = this.pGraphContext - .getTopOps(); + String alias = aliases[pos]; Map topToTable = this.pGraphContext .getTopToTable(); - TableScanOperator tso = (TableScanOperator) topOps.get(alias); + + Operator topOp = joinTree.getAliasToOpInfo().get(alias); + if (topOp == null) { + return false; + } + List joinCols = toColumns(op.getConf().getKeys().get((byte) pos)); + if (joinCols == null || joinCols.isEmpty()) { + return false; + } + TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(topOp, joinCols); if (tso == null) { return false; } - List keys = op.getConf().getKeys().get((byte) pos); - // get all join columns from join keys stored in MapJoinDesc - List joinCols = new ArrayList(); - List joinKeys = new ArrayList(); - joinKeys.addAll(keys); - while (joinKeys.size() > 0) { - ExprNodeDesc node = joinKeys.remove(0); - if (node instanceof ExprNodeColumnDesc) { - joinCols.addAll(node.getCols()); - } else if (node instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc udfNode = ((ExprNodeGenericFuncDesc) node); - GenericUDF udf = udfNode.getGenericUDF(); - if (!FunctionRegistry.isDeterministic(udf)) { - return false; + // For nested sub-queries, the alias mapping is not maintained in QB currently. + if (pGraphContext.getTopOps().containsValue(tso)) { + for (Map.Entry> topOpEntry : + this.pGraphContext.getTopOps().entrySet()) { + if (topOpEntry.getValue() == tso) { + alias = topOpEntry.getKey(); + aliases[pos] = alias; + break; } - joinKeys.addAll(0, udfNode.getChildExprs()); } } + else { + return false; + } Table tbl = topToTable.get(tso); + if (tbl.isPartitioned()) { PrunedPartitionList prunedParts = null; try { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (revision 1411097) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (working copy) @@ -40,10 +40,10 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.TableAccessAnalyzer; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; @@ -170,19 +171,26 @@ String[] left = joinCxt.getLeftAliases(); List mapAlias = joinCxt.getMapAliases(); String baseBigAlias = null; - for(String s : left) { - if(s != null && !joinAliases.contains(s)) { - joinAliases.add(s); - if(!mapAlias.contains(s)) { - baseBigAlias = s; + for (String s : left) { + if (s != null) { + String subQueryAlias = joinCxt.getId() == null ? s : joinCxt.getId() + ":" + s; + if (!joinAliases.contains(subQueryAlias)) { + joinAliases.add(subQueryAlias); + if(!mapAlias.contains(s)) { + baseBigAlias = subQueryAlias; + } } } } - for(String s : srcs) { - if(s != null && !joinAliases.contains(s)) { - joinAliases.add(s); - if(!mapAlias.contains(s)) { - baseBigAlias = s; + + for (String s : srcs) { + if (s != null) { + String subQueryAlias = joinCxt.getId() == null ? s : joinCxt.getId() + ":" + s; + if (!joinAliases.contains(subQueryAlias)) { + joinAliases.add(subQueryAlias); + if(!mapAlias.contains(s)) { + baseBigAlias = subQueryAlias; + } } } } @@ -206,14 +214,43 @@ boolean bigTablePartitioned = true; for (int index = 0; index < joinAliases.size(); index++) { String alias = joinAliases.get(index); - TableScanOperator tso = (TableScanOperator) topOps.get(alias); - if (tso == null) { + Operator topOp = joinCxt.getAliasToOpInfo().get(alias); + if (topOp == null) { return false; } List keys = toColumns(mjDesc.getKeys().get((byte) index)); if (keys == null || keys.isEmpty()) { return false; } + int keysSize = keys.size(); + TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(topOp, keys); + if (tso == null) { + return false; + } + + // For nested sub-queries, the alias mapping is not maintained in QB currently. + if (pGraphContext.getTopOps().containsValue(tso)) { + for (Map.Entry> topOpEntry : topOps.entrySet()) { + if (topOpEntry.getValue() == tso) { + String newAlias = topOpEntry.getKey(); + joinAliases.set(index, newAlias); + if (baseBigAlias.equals(alias)) { + baseBigAlias = newAlias; + } + alias = newAlias; + break; + } + } + } + else { + return false; + } + + // In case of a constant join key, return false + if (keys.size() != keysSize) { + return false; + } + if (orders == null) { orders = new Integer[keys.size()]; } @@ -374,7 +411,7 @@ return null; } - private List toColumns(List keys) { + public List toColumns(List keys) { List columns = new ArrayList(); for (ExprNodeDesc key : keys) { if (!(key instanceof ExprNodeColumnDesc)) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (revision 1411097) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (working copy) @@ -22,6 +22,7 @@ import java.util.List; import org.apache.hadoop.hive.ql.plan.CollectDesc; +import org.apache.hadoop.hive.ql.plan.DummyStoreDesc; import org.apache.hadoop.hive.ql.plan.ExtractDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; @@ -91,6 +92,8 @@ HashTableDummyOperator.class)); opvec.add(new OpTuple(HashTableSinkDesc.class, HashTableSinkOperator.class)); + opvec.add(new OpTuple(DummyStoreDesc.class, + DummyStoreOperator.class)); } public static Operator get(Class opClass) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (revision 1411097) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (working copy) @@ -140,7 +140,7 @@ super.initializeLocalWork(hconf); } - public void initializeMapredLocalWork(MapJoinDesc conf, Configuration hconf, + public void initializeMapredLocalWork(MapJoinDesc mjConf, Configuration hconf, MapredLocalWork localWork, Log l4j) throws HiveException { if (localWork == null || localWorkInited) { return; @@ -152,6 +152,7 @@ // create map local operators Map aliasToFetchWork = localWork.getAliasToFetchWork(); Map> aliasToWork = localWork.getAliasToWork(); + Map aliasToSinkWork = conf.getAliasToSink(); for (Map.Entry entry : aliasToFetchWork.entrySet()) { String alias = entry.getKey(); @@ -165,8 +166,10 @@ forwardOp.initialize(jobClone, new ObjectInspector[]{fetchOp.getOutputObjectInspector()}); fetchOp.clearFetchContext(); - MergeQueue mergeQueue = new MergeQueue(alias, fetchWork, jobClone); + DummyStoreOperator sinkOp = aliasToSinkWork.get(alias); + MergeQueue mergeQueue = new MergeQueue(alias, fetchWork, jobClone, forwardOp, sinkOp); + aliasToMergeQueue.put(alias, mergeQueue); l4j.info("fetch operators for " + alias + " initialized"); } @@ -515,13 +518,14 @@ Operator forwardOp = localWork.getAliasToWork() .get(table); + forwardOp = conf.getAliasToSink().get(table).getChildOperators().get(0); try { InspectableObject row = mergeQueue.getNextRow(); if (row == null) { fetchDone[tag] = true; return; } - forwardOp.process(row.o, 0); + forwardOp.process(row.o, tag); // check if any operator had a fatal error or early exit during // execution if (forwardOp.getDone()) { @@ -622,15 +626,21 @@ transient FetchOperator[] segments; transient List keyFields; transient List keyFieldOIs; + transient Operator forwardOp; + transient DummyStoreOperator sinkOp; // index of FetchOperator which is providing smallest one transient Integer currentMinSegment; transient ObjectPair, InspectableObject>[] keys; - public MergeQueue(String alias, FetchWork fetchWork, JobConf jobConf) { + public MergeQueue(String alias, FetchWork fetchWork, JobConf jobConf, + Operator forwardOp, + DummyStoreOperator sinkOp) { this.alias = alias; this.fetchWork = fetchWork; this.jobConf = jobConf; + this.forwardOp = forwardOp; + this.sinkOp = sinkOp; } // paths = bucket files of small table for current bucket file of big table @@ -682,6 +692,7 @@ } } + @Override protected boolean lessThan(Object a, Object b) { return compareKeys(keys[(Integer) a].getFirst(), keys[(Integer)b].getFirst()) < 0; } @@ -735,13 +746,21 @@ } InspectableObject nextRow = segments[current].getNextRow(); if (nextRow != null) { + sinkOp.reset(); if (keys[current] == null) { keys[current] = new ObjectPair, InspectableObject>(); } - // todo this should be changed to be evaluated lazily, especially for single segment case - keys[current].setFirst(JoinUtil.computeKeys(nextRow.o, keyFields, keyFieldOIs)); - keys[current].setSecond(nextRow); - return true; + + // Pass the row though the operator tree. It is guaranteed that not more than 1 row can + // be produced from a input row. + forwardOp.process(nextRow.o, 0); + nextRow = sinkOp.getResult(); + if (nextRow.o != null) { + // todo this should be changed to be evaluated lazily, especially for single segment case + keys[current].setFirst(JoinUtil.computeKeys(nextRow.o, keyFields, keyFieldOIs)); + keys[current].setSecond(nextRow); + return true; + } } keys[current] = null; return false; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DummyStoreOperator.java (working copy) @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import java.io.Serializable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.DummyStoreDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; + +/** + * For fetch task with operator tree, row read from FetchOperator is processed via operator tree + * and finally arrives to this operator. + */ +public class DummyStoreOperator extends Operator implements Serializable { + + + private transient InspectableObject result; + + public DummyStoreOperator() { + super(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + outputObjInspector = inputObjInspectors[0]; + result = new InspectableObject(null, outputObjInspector); + initializeChildren(hconf); + } + + @Override + public void processOp(Object row, int tag) throws HiveException { + // Store the row + result.o = row; + } + + @Override + public void reset() { + result.o = null; + } + + public InspectableObject getResult() { + return result; + } + + @Override + public OperatorType getType() { + return OperatorType.FORWARD; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DummyStoreDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DummyStoreDesc.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DummyStoreDesc.java (working copy) @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + + +/** + * ForwardDesc. + * + */ +@Explain(displayName = "Dummy Store") +public class DummyStoreDesc extends AbstractOperatorDesc { + private static final long serialVersionUID = 1L; + + public DummyStoreDesc() { + } + + @Override + public DummyStoreDesc clone() { + return new DummyStoreDesc(); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/SMBJoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/SMBJoinDesc.java (revision 1411097) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SMBJoinDesc.java (working copy) @@ -20,16 +20,20 @@ import java.io.Serializable; import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; + @Explain(displayName = "Sorted Merge Bucket Map Join Operator") public class SMBJoinDesc extends MapJoinDesc implements Serializable { private static final long serialVersionUID = 1L; - + private MapredLocalWork localWork; - - //keep a mapping from tag to the fetch operator alias + + //keep a mapping from tag to the fetch operator alias private HashMap tagToAlias; + private Map aliasToSink; public SMBJoinDesc(MapJoinDesc conf) { super(conf); @@ -53,5 +57,12 @@ public void setTagToAlias(HashMap tagToAlias) { this.tagToAlias = tagToAlias; } - + + public Map getAliasToSink() { + return aliasToSink; + } + + public void setAliasToSink(Map aliasToSink) { + this.aliasToSink = aliasToSink; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java (revision 1411097) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java (working copy) @@ -22,8 +22,12 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Map.Entry; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + /** * Internal representation of the join tree. * @@ -39,7 +43,12 @@ private JoinCond[] joinCond; private boolean noOuterJoin; private boolean noSemiJoin; + private Map> aliasToOpInfo; + // The subquery identifier from QB. + // It is of the form topSubQuery:innerSubQuery:....:innerMostSubQuery + private String id; + // keeps track of the right-hand-side table name of the left-semi-join, and // its list of join keys private final HashMap> rhsSemijoin; @@ -74,6 +83,7 @@ noOuterJoin = true; noSemiJoin = true; rhsSemijoin = new HashMap>(); + aliasToOpInfo = new HashMap>(); } /** @@ -320,4 +330,20 @@ public void setFilterMap(int[][] filterMap) { this.filterMap = filterMap; } + + public Map> getAliasToOpInfo() { + return aliasToOpInfo; + } + + public void setAliasToOpInfo(Map> aliasToOpInfo) { + this.aliasToOpInfo = aliasToOpInfo; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1411097) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -5568,7 +5568,7 @@ } private Operator genJoinOperator(QB qb, QBJoinTree joinTree, - HashMap map) throws SemanticException { + Map map) throws SemanticException { QBJoinTree leftChild = joinTree.getJoinSrc(); Operator joinSrcOp = null; if (leftChild != null) { @@ -5767,7 +5767,7 @@ } } - private Operator genJoinPlan(QB qb, HashMap map) + private Operator genJoinPlan(QB qb, Map map) throws SemanticException { QBJoinTree joinTree = qb.getQbJoinTree(); Operator joinOp = genJoinOperator(qb, joinTree, map); @@ -5779,7 +5779,7 @@ * source operators. This procedure traverses the query tree recursively, */ private void pushJoinFilters(QB qb, QBJoinTree joinTree, - HashMap map) throws SemanticException { + Map map) throws SemanticException { if (joinTree.getJoinSrc() != null) { pushJoinFilters(qb, joinTree.getJoinSrc(), map); } @@ -5819,7 +5819,8 @@ return cols; } - private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree) + private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree, + Map aliasToOpInfo) throws SemanticException { QBJoinTree joinTree = new QBJoinTree(); joinTree.setNoOuterJoin(false); @@ -5858,6 +5859,9 @@ } else { rightAliases.add(alias); } + joinTree.getAliasToOpInfo().put( + qb.getId() == null ? alias : qb.getId() + ":" + alias, aliasToOpInfo.get(alias)); + joinTree.setId(qb.getId()); baseSrc.add(alias); preserved.add(lastPreserved); @@ -5915,7 +5919,8 @@ return joinTree; } - private QBJoinTree genJoinTree(QB qb, ASTNode joinParseTree) + private QBJoinTree genJoinTree(QB qb, ASTNode joinParseTree, + Map aliasToOpInfo) throws SemanticException { QBJoinTree joinTree = new QBJoinTree(); JoinCond[] condn = new JoinCond[1]; @@ -5962,8 +5967,11 @@ String[] children = new String[2]; children[0] = alias; joinTree.setBaseSrc(children); + joinTree.setId(qb.getId()); + joinTree.getAliasToOpInfo().put( + qb.getId() == null ? alias : qb.getId() + ":" + alias, aliasToOpInfo.get(alias)); } else if (isJoinToken(left)) { - QBJoinTree leftTree = genJoinTree(qb, left); + QBJoinTree leftTree = genJoinTree(qb, left, aliasToOpInfo); joinTree.setJoinSrc(leftTree); String[] leftChildAliases = leftTree.getLeftAliases(); String leftAliases[] = new String[leftChildAliases.length + 1]; @@ -5992,6 +6000,10 @@ } children[1] = alias; joinTree.setBaseSrc(children); + aliasToOpInfo.get(alias); + joinTree.setId(qb.getId()); + joinTree.getAliasToOpInfo().put( + qb.getId() == null ? alias : qb.getId() + ":" + alias, aliasToOpInfo.get(alias)); // remember rhs table for semijoin if (joinTree.getNoSemiJoin() == false) { joinTree.addRHSSemijoin(alias); @@ -6096,6 +6108,7 @@ rightAliases[i + trgtRightAliases.length] = nodeRightAliases[i]; } target.setRightAliases(rightAliases); + target.getAliasToOpInfo().putAll(node.getAliasToOpInfo()); String[] nodeBaseSrc = node.getBaseSrc(); String[] trgtBaseSrc = target.getBaseSrc(); @@ -7407,7 +7420,7 @@ public Operator genPlan(QB qb) throws SemanticException { // First generate all the opInfos for the elements in the from clause - HashMap aliasToOpInfo = new HashMap(); + Map aliasToOpInfo = new HashMap(); // Recurse over the subqueries to fill the subquery part of the plan for (String alias : qb.getSubqAliases()) { @@ -7433,10 +7446,10 @@ ASTNode joinExpr = qb.getParseInfo().getJoinExpr(); if (joinExpr.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) { - QBJoinTree joinTree = genUniqueJoinTree(qb, joinExpr); + QBJoinTree joinTree = genUniqueJoinTree(qb, joinExpr, aliasToOpInfo); qb.setQbJoinTree(joinTree); } else { - QBJoinTree joinTree = genJoinTree(qb, joinExpr); + QBJoinTree joinTree = genJoinTree(qb, joinExpr, aliasToOpInfo); qb.setQbJoinTree(joinTree); mergeJoinTree(qb); } @@ -7472,7 +7485,7 @@ * @throws SemanticException */ - void genLateralViewPlans(HashMap aliasToOpInfo, QB qb) + void genLateralViewPlans(Map aliasToOpInfo, QB qb) throws SemanticException { Map> aliasToLateralViews = qb.getParseInfo() .getAliasToLateralViews(); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java (revision 1411097) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java (working copy) @@ -226,10 +226,9 @@ * names on that table that map to the keys used for the input * operator (which is currently only a join or group by). */ - private static TableScanOperator genRootTableScan( + public static TableScanOperator genRootTableScan( Operator op, List keyNames) { - boolean complexTree = false; Operator currOp = op; List currColNames = keyNames; List> parentOps = null; @@ -238,26 +237,24 @@ // along the way that changes the rows from the table through // joins or aggregations. Only allowed operators are selects // and filters. - while (!complexTree) { + while (true) { parentOps = currOp.getParentOperators(); if (parentOps == null) { - break; + return (TableScanOperator) currOp; } if (parentOps.size() > 1 || !(currOp.columnNamesRowResolvedCanBeObtained())) { - complexTree = true; + return null; } else { // Generate the map of the input->output column name for the keys // we are about if (!TableAccessAnalyzer.genColNameMap(currOp, currColNames)) { - complexTree = true; + return null; } currOp = parentOps.get(0); } } - - return complexTree? null: (TableScanOperator) currOp; } /*