Index: conf/hive-default.xml.template =================================================================== --- conf/hive-default.xml.template (revision 1438474) +++ conf/hive-default.xml.template (working copy) @@ -814,8 +814,25 @@ Whether Hive enable the optimization about converting common join into mapjoin based on the input file size + + hive.auto.convert.join.aggressivemapjoin + false + Whether Hive enable the optimization about converting common join into mapjoin based on the input file + size. If this paramater is on, and the size for n-1 of the tables/partitions for a n-way join is smaller than the + specified size, the join is directly converted to a mapjoin (there is no conditional task). + + + hive.auto.convert.join.aggressivemapjoin.size + 10000 + If hive.auto.convert.join.aggressivemapjoin is off, this parameter does not take affect. However, if it + is on, and the size for n-1 of the tables/partitions for a n-way join is smaller than this size, the join is directly + converted to a mapjoin(there is no conditional task). + + + + hive.script.auto.progress false Whether Hive Tranform/Map/Reduce Clause should automatically send progress information to TaskTracker to avoid the task getting killed because of inactivity. Hive sends progress information when the script is outputting to stderr. This option removes the need of periodically producing stderr messages, but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker. Index: build.properties =================================================================== --- build.properties (revision 1438474) +++ build.properties (working copy) @@ -79,7 +79,7 @@ # (measured in milliseconds). Ignored if fork is disabled. When running # multiple tests inside the same Java VM (see forkMode), timeout # applies to the time that all tests use together, not to an individual test. -test.junit.timeout=43200000 +test.junit.timeout=432000000 # Use this property to selectively disable tests from the command line: # ant test -Dtest.junit.exclude="**/TestCliDriver.class" Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1438474) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -37,8 +37,6 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.LogUtils; -import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.mapred.JobConf; @@ -471,6 +469,8 @@ HIVESKEWJOIN("hive.optimize.skewjoin", false), HIVECONVERTJOIN("hive.auto.convert.join", false), + HIVECONVERTJOINAGGMAPJOIN("hive.auto.convert.join.aggressivemapjoin", false), + HIVECONVERTJOINAGGMAPJOINSIZE("hive.auto.convert.join.aggressivemapjoin.size", 10000L), HIVESKEWJOINKEY("hive.skewjoin.key", 100000), HIVESKEWJOINMAPJOINNUMMAPTASK("hive.skewjoin.mapjoin.map.tasks", 10000), HIVESKEWJOINMAPJOINMINSPLIT("hive.skewjoin.mapjoin.min.split", 33554432L), //32M Index: ql/src/test/results/clientpositive/join29.q.out =================================================================== --- ql/src/test/results/clientpositive/join29.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/join29.q.out (working copy) @@ -3,33 +3,36 @@ POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt +SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt +SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) cnt))))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) cnt))))) STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-11 depends on stages: Stage-1, Stage-9 - Stage-2 depends on stages: Stage-11 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 + Stage-8 has a backup stage: Stage-2 + Stage-5 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-2, Stage-5, Stage-6 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-9 is a root stage + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-2 + Stage-4 is a root stage STAGE PLANS: Stage: Stage-1 @@ -88,14 +91,17 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-11 + Stage: Stage-7 + Conditional Operator + + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: -#### A masked pattern was here #### + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: -#### A masked pattern was here #### + $INTNAME HashTable Sink Operator condition expressions: 0 {_col0} {_col1} @@ -104,12 +110,12 @@ keys: 0 [Column[_col0]] 1 [Column[_col0]] - Position of Big Table: 1 + Position of Big Table: 0 - Stage: Stage-2 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: -#### A masked pattern was here #### + $INTNAME1 Map Join Operator condition map: Inner Join 0 to 1 @@ -121,7 +127,7 @@ 0 [Column[_col0]] 1 [Column[_col0]] outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 + Position of Big Table: 0 Select Operator expressions: expr: _col0 @@ -130,45 +136,27 @@ type: bigint expr: _col3 type: bigint - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: string - expr: _col1 - type: bigint - expr: _col3 - type: bigint + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-0 Move Operator tables: @@ -182,39 +170,137 @@ Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + $INTNAME + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col1 + type: bigint + $INTNAME1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-9 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: subq1:x @@ -272,7 +358,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt +SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) PREHOOK: type: QUERY @@ -280,7 +366,7 @@ PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt +SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) POSTHOOK: type: QUERY Index: ql/src/test/results/clientpositive/smb_mapjoin_14.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_14.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/smb_mapjoin_14.q.out (working copy) @@ -55,7 +55,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -76,32 +75,21 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -175,7 +163,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: @@ -197,53 +184,42 @@ 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 Select Operator expressions: expr: _col0 type: int outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: + Select Operator + expressions: expr: _col0 type: int - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: + outputColumnNames: _col0 + Select Operator + expressions: expr: _col0 type: int - sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -268,7 +244,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -359,7 +335,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: @@ -381,53 +356,42 @@ 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 Select Operator expressions: expr: _col0 type: int outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: + Select Operator + expressions: expr: _col0 type: int - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: + outputColumnNames: _col0 + Select Operator + expressions: expr: _col0 type: int - sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col0 + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -453,7 +417,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -516,382 +480,6 @@ POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 6 -PREHOOK: query: -- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them. --- Each sub-query should be converted to a sort-merge join. -explain -select src1.key, src1.cnt1, src2.cnt1 from -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq1 group by key -) src1 -join -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq2 group by key -) src2 -on src1.key = src2.key -order by src1.key, src1.cnt1, src2.cnt1 -PREHOOK: type: QUERY -POSTHOOK: query: -- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them. --- Each sub-query should be converted to a sort-merge join. -explain -select src1.key, src1.cnt1, src2.cnt1 from -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq1 group by key -) src1 -join -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq2 group by key -) src2 -on src1.key = src2.key -order by src1.key, src1.cnt1, src2.cnt1 -POSTHOOK: type: QUERY -POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2, Stage-7 - Stage-4 depends on stages: Stage-3 - Stage-6 is a root stage - Stage-7 depends on stages: Stage-6 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - src1:subq1:b - TableScan - alias: b - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: - $INTNAME - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 0 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - $INTNAME1 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: 1 - value expressions: - expr: _col1 - type: bigint - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col1} - handleSkewJoin: false - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col3 - type: bigint - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-4 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Reduce Output Operator - key expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint - sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - expr: _col2 - type: bigint - Reduce Operator Tree: - Extract - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-6 - Map Reduce - Alias -> Map Operator Tree: - src2:subq2:b - TableScan - alias: b - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-7 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col0 - type: int - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - Map-reduce partition columns: - expr: _col0 - type: int - tag: -1 - value expressions: - expr: _col1 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: int - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq1 group by key -) src1 -join -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq2 group by key -) src2 -on src1.key = src2.key -order by src1.key, src1.cnt1, src2.cnt1 -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl1 -PREHOOK: Input: default@tbl2 -#### A masked pattern was here #### -POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq1 group by key -) src1 -join -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq2 group by key -) src2 -on src1.key = src2.key -order by src1.key, src1.cnt1, src2.cnt1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl1 -POSTHOOK: Input: default@tbl2 -#### A masked pattern was here #### -POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -0 9 9 -2 1 1 -4 1 1 -5 9 9 -8 1 1 -9 1 1 PREHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. explain @@ -919,7 +507,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -949,31 +536,20 @@ 0 [Column[_col0]] 1 [Column[_col0]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1058,7 +634,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -1079,31 +654,20 @@ 0 [Column[_col0]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1212,7 +776,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -1247,31 +810,20 @@ 0 [Column[_col0]] 1 [Column[_col0]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1370,7 +922,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -1400,31 +951,20 @@ 0 [Column[_col0]] 1 [Column[_col0]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1498,13 +1038,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: subq1:a @@ -1551,33 +1090,22 @@ 0 [Column[_col0]] 1 [Column[_col0]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1648,7 +1176,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -1669,31 +1196,20 @@ 0 [Column[_col0]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1760,7 +1276,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -1790,31 +1305,20 @@ 0 [Column[_col0]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1891,7 +1395,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -1924,31 +1427,20 @@ 1 [Column[_col0]] 2 [Column[_col0]] Position of Big Table: 2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -2041,7 +1533,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -2062,32 +1553,21 @@ 0 [Column[_col0]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out (working copy) @@ -71,7 +71,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -93,21 +92,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -160,47 +158,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -231,7 +188,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/join35.q.out =================================================================== --- ql/src/test/results/clientpositive/join35.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/join35.q.out (working copy) @@ -3,9 +3,11 @@ POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt +SELECT x.key, x.value, subq1.cnt FROM ( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key UNION ALL @@ -13,9 +15,11 @@ ) subq1 JOIN src1 x ON (x.key = subq1.key) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt +SELECT x.key, x.value, subq1.cnt FROM ( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key UNION ALL @@ -24,20 +28,19 @@ JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt))))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt))))) STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-11 depends on stages: Stage-1, Stage-9 - Stage-2 depends on stages: Stage-11 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-9, Stage-10, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-2, Stage-6, Stage-7 Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-9 is a root stage + Stage-10 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-10 + Stage-2 + Stage-4 is a root stage STAGE PLANS: Stage: Stage-1 @@ -159,7 +162,10 @@ Truncated Path -> Alias: /src [null-subquery1:subq1-subquery1:x] - Stage: Stage-11 + Stage: Stage-8 + Conditional Operator + + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: x @@ -180,7 +186,7 @@ 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-2 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -201,55 +207,46 @@ Position of Big Table: 0 Select Operator expressions: - expr: _col1 - type: bigint expr: _col2 type: string expr: _col3 type: string - outputColumnNames: _col1, _col2, _col3 + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1, _col2 Select Operator expressions: - expr: _col2 + expr: _col0 type: string - expr: _col3 + expr: _col1 type: string - expr: _col1 - type: bigint + expr: UDFToInteger(_col2) + type: int outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.types string:string:int #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, i32 val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false #### A masked pattern was here #### TableScan GatherStats: false @@ -268,58 +265,49 @@ Position of Big Table: 0 Select Operator expressions: - expr: _col1 - type: bigint expr: _col2 type: string expr: _col3 type: string - outputColumnNames: _col1, _col2, _col3 + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1, _col2 Select Operator expressions: - expr: _col2 + expr: _col0 type: string - expr: _col3 + expr: _col1 type: string - expr: _col1 - type: bigint + expr: UDFToInteger(_col2) + type: int outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.types string:string:int #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, i32 val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work - Needs Tagging: false + Needs Tagging: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -341,7 +329,7 @@ escape.delim \ #### A masked pattern was here #### Partition - base file name: -mr-10004 + base file name: -mr-10003 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -355,18 +343,51 @@ columns _col0,_col1 columns.types string,bigint escape.delim \ +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-0 Move Operator tables: @@ -393,51 +414,161 @@ Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: +#### A masked pattern was here #### + Fetch Operator + limit: -1 +#### A masked pattern was here #### + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: +#### A masked pattern was here #### + TableScan + GatherStats: false + Union + HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + Position of Big Table: 1 +#### A masked pattern was here #### + TableScan + GatherStats: false + Union + HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + Position of Big Table: 1 + + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: + x + TableScan + alias: x + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col1} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col1, _col2, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.types string:string:int #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, i32 val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Needs Tagging: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10003 + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ +#### A masked pattern was here #### + Partition + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns key,value,val2 - columns.types string:string:int + columns key,value + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} + name default.src1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -445,65 +576,133 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns key,value,val2 - columns.types string:string:int + columns key,value + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} + name default.src1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 + name: default.src1 + name: default.src1 Truncated Path -> Alias: -#### A masked pattern was here #### + /src1 [x] - Stage: Stage-6 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 + TableScan + GatherStats: false + Union + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col1 + type: bigint #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false + TableScan + GatherStats: false + Union + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col1 + type: bigint + x + TableScan + alias: x + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + expr: value + type: string + Needs Tagging: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10003 + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ +#### A masked pattern was here #### + Partition + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns key,value,val2 - columns.types string:string:int + columns key,value + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} + name default.src1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -511,27 +710,78 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns key,value,val2 - columns.types string:string:int + columns key,value + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} + name default.src1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 + name: default.src1 + name: default.src1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col1, _col2, _col3 + Select Operator + expressions: + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.types string:string:int +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, i32 val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Truncated Path -> Alias: + /src1 [x] #### A masked pattern was here #### - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-9 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: null-subquery2:subq1-subquery2:x1 @@ -652,7 +902,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt +SELECT x.key, x.value, subq1.cnt FROM ( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key UNION ALL @@ -664,7 +914,7 @@ PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt +SELECT x.key, x.value, subq1.cnt FROM ( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key UNION ALL Index: ql/src/test/results/clientpositive/mapjoin_subquery2.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_subquery2.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/mapjoin_subquery2.q.out (working copy) @@ -49,64 +49,46 @@ POSTHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z POSTHOOK: type: LOAD POSTHOOK: Output: default@z -PREHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name FROM (SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 FROM y JOIN x ON (x.id = y.id)) subq JOIN z ON (subq.key1 = z.id) PREHOOK: type: QUERY -PREHOOK: Input: default@x -PREHOOK: Input: default@y -PREHOOK: Input: default@z -#### A masked pattern was here #### -POSTHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name FROM (SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 FROM y JOIN x ON (x.id = y.id)) subq JOIN z ON (subq.key1 = z.id) POSTHOOK: type: QUERY -POSTHOOK: Input: default@x -POSTHOOK: Input: default@y -POSTHOOK: Input: default@z -#### A masked pattern was here #### -2 Joe 2 Tie 2 Tie -2 Hank 2 Tie 2 Tie -PREHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id) -POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME y)) (TOK_TABREF (TOK_TABNAME x)) (= (. (TOK_TABLE_OR_COL x) id) (. (TOK_TABLE_OR_COL y) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) id) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) name) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) id) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) name) value2)))) subq) (TOK_TABREF (TOK_TABNAME z)) (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) id)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) name))))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME y)) (TOK_TABREF (TOK_TABNAME x)) (= (. (TOK_TABLE_OR_COL x) id) (. (TOK_TABLE_OR_COL y) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) id) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) name) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) id) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) name) value2)))) subq) (TOK_TABREF (TOK_TABNAME z)) (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) id)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) name))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - subq:x + subq:y Fetch Operator limit: -1 z Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - subq:x + subq:y TableScan - alias: x + alias: y HashTable Sink Operator condition expressions: 0 {id} {name} @@ -115,7 +97,7 @@ keys: 0 [Column[id]] 1 [Column[id]] - Position of Big Table: 0 + Position of Big Table: 1 z TableScan alias: z @@ -129,12 +111,12 @@ 1 [Column[id]] Position of Big Table: 0 - Stage: Stage-1 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: - subq:y + subq:x TableScan - alias: y + alias: x Map Join Operator condition map: Inner Join 0 to 1 @@ -146,29 +128,18 @@ 0 [Column[id]] 1 [Column[id]] outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 + Position of Big Table: 1 Select Operator expressions: + expr: _col5 + type: int + expr: _col4 + type: string expr: _col0 type: int expr: _col1 type: string - expr: _col4 - type: string - expr: _col5 - type: int - outputColumnNames: _col0, _col1, _col4, _col5 - Select Operator - expressions: - expr: _col5 - type: int - expr: _col4 - type: string - expr: _col0 - type: int - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3 Map Join Operator condition map: Inner Join 0 to 1 @@ -196,27 +167,12 @@ expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Local Work: Map Reduce Local Work @@ -225,9 +181,9 @@ limit: -1 -PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +PREHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 +(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 FROM y JOIN x ON (x.id = y.id)) subq JOIN z ON (subq.key1 = z.id) PREHOOK: type: QUERY @@ -235,9 +191,9 @@ PREHOOK: Input: default@y PREHOOK: Input: default@z #### A masked pattern was here #### -POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +POSTHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 +(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 FROM y JOIN x ON (x.id = y.id)) subq JOIN z ON (subq.key1 = z.id) POSTHOOK: type: QUERY Index: ql/src/test/results/clientpositive/join30.q.out =================================================================== --- ql/src/test/results/clientpositive/join30.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/join30.q.out (working copy) @@ -15,14 +15,13 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: x @@ -60,50 +59,39 @@ 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: + Select Operator + expressions: expr: _col0 type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: + outputColumnNames: _col0 + Select Operator + expressions: expr: _col0 type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: @@ -147,7 +135,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator Index: ql/src/test/results/clientpositive/bucketcontext_4.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_4.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketcontext_4.q.out (working copy) @@ -81,13 +81,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -133,21 +132,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -202,47 +200,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -273,7 +230,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator @@ -306,7 +263,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -328,21 +284,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -395,47 +350,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -466,7 +380,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin9.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin9.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketmapjoin9.q.out (working copy) @@ -70,13 +70,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -115,21 +114,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -183,47 +181,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -254,7 +211,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator @@ -335,13 +292,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -380,21 +336,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -448,47 +403,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -519,7 +433,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin13.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin13.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketmapjoin13.q.out (working copy) @@ -98,13 +98,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -143,21 +142,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -258,48 +256,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -330,7 +286,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -388,13 +345,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '2')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -440,21 +396,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -508,47 +463,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -579,7 +493,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -649,13 +563,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -701,21 +614,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -769,47 +681,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -840,7 +711,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -912,13 +783,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -964,21 +834,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -1032,47 +901,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -1103,7 +931,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/join28.q.out =================================================================== --- ql/src/test/results/clientpositive/join28.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/join28.q.out (working copy) @@ -3,50 +3,49 @@ POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))))) STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-1 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-8 is a root stage + Stage-7 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - subq:x + subq:y Fetch Operator limit: -1 z Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - subq:x + subq:y TableScan - alias: x + alias: y HashTable Sink Operator condition expressions: 0 {key} @@ -55,7 +54,7 @@ keys: 0 [Column[key]] 1 [Column[key]] - Position of Big Table: 1 + Position of Big Table: 0 z TableScan alias: z @@ -69,12 +68,12 @@ 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-1 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: - subq:y + subq:x TableScan - alias: y + alias: x Map Join Operator condition map: Inner Join 0 to 1 @@ -86,17 +85,12 @@ 0 [Column[key]] 1 [Column[key]] outputColumnNames: _col0 - Position of Big Table: 1 + Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 Map Join Operator condition map: Inner Join 0 to 1 @@ -115,34 +109,18 @@ type: string expr: _col5 type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-0 Move Operator tables: @@ -153,46 +131,14 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Stage: Stage-5 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY @@ -202,9 +148,9 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY Index: ql/src/test/results/clientpositive/smb_mapjoin_13.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (working copy) @@ -77,7 +77,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -100,21 +99,43 @@ 1 [Column[value]] outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -165,70 +186,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table1 name: default.test_table1 - Truncated Path -> Alias: - /test_table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col4, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ Reduce Operator Tree: Extract Limit @@ -250,7 +207,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /test_table1 [a] Stage: Stage-0 Fetch Operator @@ -307,13 +264,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table3) a) (TOK_TABREF (TOK_TABNAME test_table4) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -353,21 +309,43 @@ 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[value]()] outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string Local Work: Map Reduce Local Work Needs Tagging: false @@ -420,70 +398,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 name: default.test_table3 - Truncated Path -> Alias: - /test_table3 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col4, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ Reduce Operator Tree: Extract Limit @@ -505,7 +419,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /test_table3 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out (working copy) @@ -64,13 +64,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table_desc1) a) (TOK_TABREF (TOK_TABNAME table_desc2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 10)))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -116,37 +115,26 @@ 1 [Column[key], Column[value]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/join34.q.out =================================================================== --- ql/src/test/results/clientpositive/join34.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/join34.q.out (working copy) @@ -3,9 +3,11 @@ POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value +SELECT x.key, x.value, subq1.value FROM ( SELECT x.key as key, x.value as value from src x where x.key < 20 UNION ALL @@ -13,9 +15,11 @@ ) subq1 JOIN src1 x ON (x.key = subq1.key) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value +SELECT x.key, x.value, subq1.value FROM ( SELECT x.key as key, x.value as value from src x where x.key < 20 UNION ALL @@ -24,21 +28,16 @@ JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) value) value)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) value) value)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-1 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: x @@ -59,7 +58,7 @@ 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-1 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: null-subquery1:subq1-subquery1:x @@ -93,46 +92,37 @@ Position of Big Table: 0 Select Operator expressions: - expr: _col1 - type: string expr: _col2 type: string expr: _col3 type: string - outputColumnNames: _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 + expr: _col1 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.types string:string:string #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false null-subquery2:subq1-subquery2:x1 TableScan alias: x1 @@ -164,49 +154,40 @@ Position of Big Table: 0 Select Operator expressions: - expr: _col1 - type: string expr: _col2 type: string expr: _col3 type: string - outputColumnNames: _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 + expr: _col1 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.types string:string:string #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work - Needs Tagging: false + Needs Tagging: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -252,89 +233,25 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src name: default.src - Truncated Path -> Alias: - /src [null-subquery1:subq1-subquery1:x, null-subquery2:subq1-subquery2:x1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true #### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 -#### A masked pattern was here #### - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### Partition - base file name: -ext-10002 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns key,value,val2 - columns.types string:string:string + columns key,value + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} + name default.src1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -342,68 +259,31 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns key,value,val2 - columns.types string:string:string + columns key,value + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} + name default.src1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 + name: default.src1 + name: default.src1 Truncated Path -> Alias: -#### A masked pattern was here #### + /src [null-subquery1:subq1-subquery1:x, null-subquery2:subq1-subquery2:x1] - Stage: Stage-5 - Map Reduce - Alias -> Map Operator Tree: + Stage: Stage-0 + Move Operator + tables: + replace: true #### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - + table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -418,19 +298,15 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - name: default.dest_j1 - Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 - Move Operator - files: - hdfs directory: true + Stage: Stage-2 + Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value +SELECT x.key, x.value, subq1.value FROM ( SELECT x.key as key, x.value as value from src x where x.key < 20 UNION ALL @@ -442,7 +318,7 @@ PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value +SELECT x.key, x.value, subq1.value FROM ( SELECT x.key as key, x.value as value from src x where x.key < 20 UNION ALL Index: ql/src/test/results/clientpositive/skewjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoin.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/skewjoin.q.out (working copy) @@ -1524,13 +1524,12 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) k) (TOK_TABREF (TOK_TABNAME T1) v) (= (+ (. (TOK_TABLE_OR_COL k) key) 1) (. (TOK_TABLE_OR_COL v) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST v))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL k) key)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL v) val))))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: v @@ -1568,48 +1567,37 @@ 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] outputColumnNames: _col0, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Group By Operator + aggregations: + expr: sum(hash(_col0)) + expr: sum(hash(_col5)) + bucketGroup: false + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + expr: _col1 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col5)) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/bucketcontext_8.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_8.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketcontext_8.q.out (working copy) @@ -94,13 +94,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -146,21 +145,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -263,48 +261,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -335,7 +291,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -370,7 +327,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -392,21 +348,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -507,48 +462,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -579,7 +492,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketcontext_3.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_3.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketcontext_3.q.out (working copy) @@ -69,13 +69,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -121,21 +120,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -190,47 +188,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -261,7 +218,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator @@ -294,7 +251,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -316,21 +272,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -383,47 +338,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -454,7 +368,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin8.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin8.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketmapjoin8.q.out (working copy) @@ -64,13 +64,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -116,21 +115,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -184,47 +182,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -255,7 +212,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator @@ -307,13 +264,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -359,21 +315,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -427,47 +382,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -498,7 +412,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin12.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin12.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketmapjoin12.q.out (working copy) @@ -92,13 +92,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -144,21 +143,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -212,47 +210,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -283,7 +240,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator @@ -327,13 +284,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_3) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -372,21 +328,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -440,47 +395,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -511,7 +425,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/multiMapJoin1.q.out =================================================================== --- ql/src/test/results/clientpositive/multiMapJoin1.q.out (revision 0) +++ ql/src/test/results/clientpositive/multiMapJoin1.q.out (working copy) @@ -0,0 +1,1131 @@ +PREHOOK: query: -- Join of a big table with 2 small tables on different keys should be performed as a single MR job +create table smallTbl1(key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Join of a big table with 2 small tables on different keys should be performed as a single MR job +create table smallTbl1(key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smallTbl1 +PREHOOK: query: insert overwrite table smallTbl1 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@smalltbl1 +POSTHOOK: query: insert overwrite table smallTbl1 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@smalltbl1 +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table smallTbl2(key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table smallTbl2(key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smallTbl2 +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table smallTbl2 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@smalltbl2 +POSTHOOK: query: insert overwrite table smallTbl2 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@smalltbl2 +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table bigTbl(key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bigTbl(key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bigTbl +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table bigTbl +select * from +( + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src +) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bigtbl +POSTHOOK: query: insert overwrite table bigTbl +select * from +( + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src +) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bigtbl +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-10 is a root stage , consists of Stage-13, Stage-14, Stage-3 + Stage-13 has a backup stage: Stage-3 + Stage-8 depends on stages: Stage-13 + Stage-7 depends on stages: Stage-3, Stage-8, Stage-9 , consists of Stage-11, Stage-12, Stage-1 + Stage-11 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-11 + Stage-2 depends on stages: Stage-1, Stage-5, Stage-6 + Stage-12 has a backup stage: Stage-1 + Stage-6 depends on stages: Stage-12 + Stage-1 + Stage-14 has a backup stage: Stage-3 + Stage-9 depends on stages: Stage-14 + Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-10 + Conditional Operator + + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + firstjoin:smalltbl1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + firstjoin:smalltbl1 + TableScan + alias: smalltbl1 + HashTable Sink Operator + condition expressions: + 0 {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + firstjoin:bigtbl + TableScan + alias: bigtbl + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col1 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + smalltbl2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + smalltbl2 + TableScan + alias: smalltbl2 + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + Position of Big Table: 0 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + Position of Big Table: 1 + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + smalltbl2 + TableScan + alias: smalltbl2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 0 + smalltbl2 + TableScan + alias: smalltbl2 + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + firstjoin:bigtbl + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + firstjoin:bigtbl + TableScan + alias: bigtbl + HashTable Sink Operator + condition expressions: + 0 {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 + + Stage: Stage-9 + Map Reduce + Alias -> Map Operator Tree: + firstjoin:smalltbl1 + TableScan + alias: smalltbl1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col1 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + firstjoin:bigtbl + TableScan + alias: bigtbl + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: value + type: string + firstjoin:smalltbl1 + TableScan + alias: smalltbl1 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@bigtbl +PREHOOK: Input: default@smalltbl1 +PREHOOK: Input: default@smalltbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bigtbl +POSTHOOK: Input: default@smalltbl1 +POSTHOOK: Input: default@smalltbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +580 +PREHOOK: query: explain +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-2 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + firstjoin:smalltbl1 + Fetch Operator + limit: -1 + smalltbl2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + firstjoin:smalltbl1 + TableScan + alias: smalltbl1 + HashTable Sink Operator + condition expressions: + 0 {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + smalltbl2 + TableScan + alias: smalltbl2 + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + Position of Big Table: 0 + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + firstjoin:bigtbl + TableScan + alias: bigtbl + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col1 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@bigtbl +PREHOOK: Input: default@smalltbl1 +PREHOOK: Input: default@smalltbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bigtbl +POSTHOOK: Input: default@smalltbl1 +POSTHOOK: Input: default@smalltbl2 +#### A masked pattern was here #### +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +580 +PREHOOK: query: create table smallTbl3(key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table smallTbl3(key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@smallTbl3 +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table smallTbl3 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@smalltbl3 +POSTHOOK: query: insert overwrite table smallTbl3 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@smalltbl3 +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: drop table bigTbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@bigtbl +PREHOOK: Output: default@bigtbl +POSTHOOK: query: drop table bigTbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@bigtbl +POSTHOOK: Output: default@bigtbl +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table bigTbl(key1 string, key2 string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bigTbl(key1 string, key2 string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bigTbl +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table bigTbl +select * from +( + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src +) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bigtbl +POSTHOOK: query: insert overwrite table bigTbl +select * from +( + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src +) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bigtbl +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) FROM + ( + SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, + firstjoin.value1 as value1, firstjoin.value2 as value2 FROM + (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, + bigTbl.value as value1, bigTbl.value as value2 + FROM bigTbl JOIN smallTbl1 + on (bigTbl.key1 = smallTbl1.key) + ) firstjoin + JOIN + smallTbl2 on (firstjoin.value1 = smallTbl2.value) + ) secondjoin + JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@bigtbl +PREHOOK: Input: default@smalltbl1 +PREHOOK: Input: default@smalltbl2 +PREHOOK: Input: default@smalltbl3 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM + ( + SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, + firstjoin.value1 as value1, firstjoin.value2 as value2 FROM + (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, + bigTbl.value as value1, bigTbl.value as value2 + FROM bigTbl JOIN smallTbl1 + on (bigTbl.key1 = smallTbl1.key) + ) firstjoin + JOIN + smallTbl2 on (firstjoin.value1 = smallTbl2.value) + ) secondjoin + JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bigtbl +POSTHOOK: Input: default@smalltbl1 +POSTHOOK: Input: default@smalltbl2 +POSTHOOK: Input: default@smalltbl3 +#### A masked pattern was here #### +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1660 +PREHOOK: query: -- join with 4 tables on different keys is also executed as a single MR job +explain +select count(*) FROM + ( + SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, + firstjoin.value1 as value1, firstjoin.value2 as value2 FROM + (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, + bigTbl.value as value1, bigTbl.value as value2 + FROM bigTbl JOIN smallTbl1 + on (bigTbl.key1 = smallTbl1.key) + ) firstjoin + JOIN + smallTbl2 on (firstjoin.value1 = smallTbl2.value) + ) secondjoin + JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- join with 4 tables on different keys is also executed as a single MR job +explain +select count(*) FROM + ( + SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, + firstjoin.value1 as value1, firstjoin.value2 as value2 FROM + (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, + bigTbl.value as value1, bigTbl.value as value2 + FROM bigTbl JOIN smallTbl1 + on (bigTbl.key1 = smallTbl1.key) + ) firstjoin + JOIN + smallTbl2 on (firstjoin.value1 = smallTbl2.value) + ) secondjoin + JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) value2) value2)))) secondjoin) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL secondjoin) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-11 is a root stage + Stage-10 depends on stages: Stage-11 + Stage-4 depends on stages: Stage-10 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + secondjoin:firstjoin:smalltbl1 + Fetch Operator + limit: -1 + secondjoin:smalltbl2 + Fetch Operator + limit: -1 + smalltbl3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + secondjoin:firstjoin:smalltbl1 + TableScan + alias: smalltbl1 + HashTable Sink Operator + condition expressions: + 0 {key2} {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key1]] + 1 [Column[key]] + Position of Big Table: 0 + secondjoin:smalltbl2 + TableScan + alias: smalltbl2 + HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col2]] + 1 [Column[value]] + Position of Big Table: 0 + smalltbl3 + TableScan + alias: smalltbl3 + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-10 + Map Reduce + Alias -> Map Operator Tree: + secondjoin:firstjoin:bigtbl + TableScan + alias: bigtbl + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key2} {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key1]] + 1 [Column[key]] + outputColumnNames: _col1, _col2 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: string + outputColumnNames: _col1, _col2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col1} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col2]] + 1 [Column[value]] + outputColumnNames: _col1 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[key]] + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(*) FROM + ( + SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, + firstjoin.value1 as value1, firstjoin.value2 as value2 FROM + (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, + bigTbl.value as value1, bigTbl.value as value2 + FROM bigTbl JOIN smallTbl1 + on (bigTbl.key1 = smallTbl1.key) + ) firstjoin + JOIN + smallTbl2 on (firstjoin.value1 = smallTbl2.value) + ) secondjoin + JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@bigtbl +PREHOOK: Input: default@smalltbl1 +PREHOOK: Input: default@smalltbl2 +PREHOOK: Input: default@smalltbl3 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM + ( + SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, + firstjoin.value1 as value1, firstjoin.value2 as value2 FROM + (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, + bigTbl.value as value1, bigTbl.value as value2 + FROM bigTbl JOIN smallTbl1 + on (bigTbl.key1 = smallTbl1.key) + ) firstjoin + JOIN + smallTbl2 on (firstjoin.value1 = smallTbl2.value) + ) secondjoin + JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bigtbl +POSTHOOK: Input: default@smalltbl1 +POSTHOOK: Input: default@smalltbl2 +POSTHOOK: Input: default@smalltbl3 +#### A masked pattern was here #### +POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1660 Index: ql/src/test/results/clientpositive/bucket_map_join_2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucket_map_join_2.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucket_map_join_2.q.out (working copy) @@ -50,13 +50,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -95,21 +94,20 @@ 0 [Column[key], Column[value]] 1 [Column[key], Column[value]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -162,47 +160,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.table1 name: default.table1 - Truncated Path -> Alias: - /table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -233,7 +190,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /table1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out (working copy) @@ -67,7 +67,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -93,35 +92,24 @@ 1 [Column[key], Column[value]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 - type: bigint + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/join38.q.out =================================================================== --- ql/src/test/results/clientpositive/join38.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/join38.q.out (working copy) @@ -73,13 +73,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME tmp) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) col11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) col5)) (TOK_SELEXPR (TOK_FUNCTION count 1) count)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) col11) 111)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) col5)))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -125,62 +124,51 @@ 1 [Column[col11]] outputColumnNames: _col1, _col9, _col15 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - expr: _col9 - type: string - expr: _col15 - type: string - outputColumnNames: _col1, _col9, _col15 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col9 - type: string - outputColumnNames: _col1, _col9 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col1 - type: string - expr: _col9 - type: string - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: - expr: _col0 - type: string + Select Operator + expressions: expr: _col1 type: string - sort order: ++ - Map-reduce partition columns: - expr: _col0 + expr: _col9 type: string - expr: _col1 + expr: _col15 type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + outputColumnNames: _col1, _col9, _col15 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col9 + type: string + outputColumnNames: _col1, _col9 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col1 + type: string + expr: _col9 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out (working copy) @@ -56,13 +56,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1)) (TOK_TABREF (TOK_TABNAME src1) src2) (AND (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)) (< (. (TOK_TABLE_OR_COL src1) key) 10)) (> (. (TOK_TABLE_OR_COL src2) key) 10))) (TOK_TABREF (TOK_TABNAME src) src3) (AND (= (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src3) key)) (< (. (TOK_TABLE_OR_COL src3) key) 300)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src1 src2))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) key))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -146,72 +145,61 @@ 2 [Column[key]] outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 Position of Big Table: 2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: string + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col2 + type: string + expr: _col4 + type: string + sort order: +++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string - expr: _col4 - type: string - sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator Index: ql/src/test/results/clientpositive/join33.q.out =================================================================== --- ql/src/test/results/clientpositive/join33.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/join33.q.out (working copy) @@ -3,35 +3,41 @@ POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-3 depends on stages: Stage-6 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: x Fetch Operator limit: -1 + z + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: x TableScan @@ -46,8 +52,21 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 + z + TableScan + alias: z + GatherStats: false + HashTable Sink Operator + condition expressions: + 0 {_col5} {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + Position of Big Table: 0 - Stage: Stage-3 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: y @@ -66,24 +85,54 @@ 1 [Column[key]] outputColumnNames: _col0, _col1, _col5 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col5} {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + outputColumnNames: _col1, _col4, _col9 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: string + expr: _col9 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work - Needs Tagging: false + Needs Tagging: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -129,94 +178,25 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src name: default.src - Truncated Path -> Alias: - /src [y] - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: #### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col5 - type: string - expr: _col0 - type: string - z - TableScan - alias: z - GatherStats: false - Reduce Output Operator - key expressions: - expr: value - type: string - sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 - value expressions: - expr: value - type: string - Needs Tagging: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ -#### A masked pattern was here #### - Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: bucket_count -1 columns key,value columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 + numPartitions 0 numRows 0 - partition_columns ds/hr rawDataSize 0 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -227,65 +207,21 @@ columns key,value columns.types string:string #### A masked pattern was here #### - name default.srcpart - numFiles 4 - numPartitions 4 + name default.src1 + numFiles 1 + numPartitions 0 numRows 0 - partition_columns ds/hr rawDataSize 0 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col1} {VALUE._col4} - 1 {VALUE._col1} - handleSkewJoin: false - outputColumnNames: _col1, _col4, _col9 - Select Operator - expressions: - expr: _col4 - type: string - expr: _col9 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] -#### A masked pattern was here #### + /src [y] Stage: Stage-0 Move Operator @@ -315,7 +251,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY @@ -325,7 +261,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY Index: ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (working copy) @@ -1,24 +1,29 @@ -PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key))))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key))))) STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-1 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: src Fetch Operator limit: -1 + src1 + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: src TableScan @@ -32,42 +37,7 @@ 0 [Column[value]] 1 [Column[value]] Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - srcpart - TableScan - alias: srcpart - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[value]] - 1 [Column[value]] - outputColumnNames: _col0 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: src1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src1 TableScan alias: src1 HashTable Sink Operator @@ -80,43 +50,47 @@ 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-2 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 + srcpart + TableScan + alias: srcpart Map Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {_col0} + 0 {key} 1 handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] + 0 [Column[value]] + 1 [Column[value]] outputColumnNames: _col0 Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Local Work: Map Reduce Local Work @@ -125,28 +99,29 @@ limit: -1 -PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds +PREHOOK: query: explain select count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds +POSTHOOK: query: explain select count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-1 depends on stages: Stage-7 - Stage-6 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-7 depends on stages: Stage-8 + Stage-3 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: src Fetch Operator limit: -1 + src1 + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: src TableScan @@ -160,42 +135,7 @@ 0 [Column[value]] 1 [Column[value]] Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - srcpart - TableScan - alias: srcpart - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {ds} - 1 - handleSkewJoin: false - keys: - 0 [Column[value]] - 1 [Column[value]] - outputColumnNames: _col0, _col2 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: src1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src1 TableScan alias: src1 HashTable Sink Operator @@ -208,35 +148,56 @@ 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-2 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col2 + srcpart + TableScan + alias: srcpart Map Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {_col2} + 0 {key} {ds} 1 handleSkewJoin: false keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col2 + 0 [Column[value]] + 1 [Column[value]] + outputColumnNames: _col0, _col2 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col2} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col2 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col2 + type: string + outputColumnNames: _col2 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: _col2 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work @@ -244,37 +205,18 @@ Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### - Select Operator - expressions: - expr: _col2 - type: string - outputColumnNames: _col2 - Select Operator - expressions: - expr: _col2 + Reduce Output Operator + key expressions: + expr: _col0 type: string - outputColumnNames: _col2 - Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col2 - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -302,7 +244,7 @@ limit: -1 -PREHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds +PREHOOK: query: select count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@srcpart @@ -311,7 +253,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -POSTHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds +POSTHOOK: query: select count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart Index: ql/src/test/results/clientpositive/bucketcontext_7.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_7.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketcontext_7.q.out (working copy) @@ -94,13 +94,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -146,21 +145,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -263,48 +261,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -335,7 +291,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -370,7 +327,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -392,21 +348,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -507,48 +462,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -579,7 +492,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketcontext_2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_2.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketcontext_2.q.out (working copy) @@ -69,13 +69,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -121,21 +120,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -238,48 +236,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -310,7 +266,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -343,7 +300,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -365,21 +321,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -480,48 +435,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -552,7 +465,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin11.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin11.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketmapjoin11.q.out (working copy) @@ -124,13 +124,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -176,21 +175,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -291,48 +289,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -363,7 +319,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -407,13 +364,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) (. (TOK_TABLE_OR_COL b) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -459,21 +415,20 @@ 0 [Column[key], Column[part]] 1 [Column[key], Column[part]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -574,48 +529,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -646,7 +559,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/smb_mapjoin_16.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_16.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_16.q.out (working copy) @@ -0,0 +1,120 @@ +PREHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1028 Index: ql/src/test/results/clientpositive/mapjoin_subquery.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_subquery.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/mapjoin_subquery.q.out (working copy) @@ -1,39 +1,43 @@ -PREHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - subq:x + subq:y Fetch Operator limit: -1 z Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - subq:x + subq:y TableScan - alias: x + alias: y HashTable Sink Operator condition expressions: 0 {key} @@ -42,7 +46,7 @@ keys: 0 [Column[key]] 1 [Column[key]] - Position of Big Table: 1 + Position of Big Table: 0 z TableScan alias: z @@ -56,12 +60,12 @@ 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-1 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: - subq:y + subq:x TableScan - alias: y + alias: x Map Join Operator condition map: Inner Join 0 to 1 @@ -73,17 +77,12 @@ 0 [Column[key]] 1 [Column[key]] outputColumnNames: _col0 - Position of Big Table: 1 + Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 Map Join Operator condition map: Inner Join 0 to 1 @@ -102,20 +101,13 @@ type: string expr: _col5 type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Local Work: Map Reduce Local Work @@ -124,93 +116,91 @@ limit: -1 -PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +PREHOOK: query: SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +ORDER BY subq.key1, z.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +POSTHOOK: query: SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +ORDER BY subq.key1, z.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### -238 val_238 -238 val_238 -311 val_311 -311 val_311 -311 val_311 -255 val_255 -255 val_255 -278 val_278 -278 val_278 -98 val_98 -98 val_98 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -150 val_150 -273 val_273 -273 val_273 -273 val_273 -224 val_224 -224 val_224 -369 val_369 -369 val_369 -369 val_369 -66 val_66 128 val_128 128 val_128 128 val_128 -213 val_213 -213 val_213 -146 val_146 -146 val_146 -406 val_406 -406 val_406 -406 val_406 -406 val_406 128 val_128 128 val_128 128 val_128 -311 val_311 -311 val_311 -311 val_311 +128 val_128 +128 val_128 +128 val_128 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +150 val_150 213 val_213 213 val_213 +213 val_213 +213 val_213 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 278 val_278 278 val_278 +278 val_278 +278 val_278 311 val_311 311 val_311 311 val_311 -98 val_98 -98 val_98 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 369 val_369 369 val_369 369 val_369 -238 val_238 -238 val_238 -273 val_273 -273 val_273 -273 val_273 -224 val_224 -224 val_224 369 val_369 369 val_369 369 val_369 +369 val_369 +369 val_369 +369 val_369 401 val_401 401 val_401 401 val_401 @@ -221,9 +211,21 @@ 401 val_401 401 val_401 401 val_401 -128 val_128 -128 val_128 -128 val_128 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 406 val_406 406 val_406 406 val_406 @@ -232,66 +234,58 @@ 406 val_406 406 val_406 406 val_406 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -255 val_255 -255 val_255 406 val_406 406 val_406 406 val_406 406 val_406 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -146 val_146 -146 val_146 -273 val_273 -273 val_273 -273 val_273 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +66 val_66 +98 val_98 +98 val_98 +98 val_98 +98 val_98 PREHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1 + order by subq.key1, z.value PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1 + order by subq.key1, z.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq) key1))))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL z) value))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 + Stage-8 is a root stage + Stage-7 depends on stages: Stage-8 + Stage-3 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - subq:x + subq:y Fetch Operator limit: -1 z Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - subq:x + subq:y TableScan - alias: x + alias: y HashTable Sink Operator condition expressions: 0 {key} @@ -300,7 +294,7 @@ keys: 0 [Column[key]] 1 [Column[key]] - Position of Big Table: 1 + Position of Big Table: 0 z TableScan alias: z @@ -314,12 +308,12 @@ 1 [Column[key]] Position of Big Table: 0 - Stage: Stage-1 + Stage: Stage-7 Map Reduce Alias -> Map Operator Tree: - subq:y + subq:x TableScan - alias: y + alias: x Map Join Operator condition map: Inner Join 0 to 1 @@ -331,17 +325,12 @@ 0 [Column[key]] 1 [Column[key]] outputColumnNames: _col0 - Position of Big Table: 1 + Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 Map Join Operator condition map: Inner Join 0 to 1 @@ -354,44 +343,39 @@ 1 [Column[key]] outputColumnNames: _col0, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: + Reduce Output Operator + key expressions: expr: _col0 type: string - expr: _col5 + expr: _col1 type: string - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string Reduce Operator Tree: Extract File Output Operator @@ -406,24 +390,24 @@ limit: -1 -PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +PREHOOK: query: SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1 + order by subq.key1, z.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +POSTHOOK: query: SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1 + order by subq.key1, z.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 Index: ql/src/test/results/clientpositive/bucket_map_join_1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucket_map_join_1.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucket_map_join_1.q.out (working copy) @@ -50,13 +50,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -95,21 +94,20 @@ 0 [Column[key], Column[value]] 1 [Column[key], Column[value]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -162,47 +160,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.table1 name: default.table1 - Truncated Path -> Alias: - /table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -233,7 +190,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /table1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out (working copy) @@ -134,13 +134,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -179,21 +178,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -294,48 +292,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -366,7 +322,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out (working copy) @@ -67,7 +67,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -93,35 +92,24 @@ 1 [Column[key], Column[value]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 - type: bigint + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/union22.q.out =================================================================== --- ql/src/test/results/clientpositive/union22.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/union22.q.out (working copy) @@ -42,26 +42,30 @@ POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain extended +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +explain extended insert overwrite table dst_union22 partition (ds='2') select * from ( select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 +select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on a.k1 = b.k1 and a.ds='1' where a.k1 > 20 ) subq PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +explain extended insert overwrite table dst_union22 partition (ds='2') select * from ( select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 +select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on a.k1 = b.k1 and a.ds='1' where a.k1 > 20 @@ -79,18 +83,22 @@ POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k1) k1) (TOK_SELEXPR (TOK_TABLE_OR_COL k2) k2) (TOK_SELEXPR (TOK_TABLE_OR_COL k3) k3) (TOK_SELEXPR (TOK_TABLE_OR_COL k4) k4)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (<= (TOK_TABLE_OR_COL k0) 50))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME dst_union22) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (> (TOK_TABLE_OR_COL k0) 50))))) b) (and (= (. (TOK_TABLE_OR_COL a) k1) (. (TOK_TABLE_OR_COL b) k1)) (= (. (TOK_TABLE_OR_COL a) ds) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k1) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k2) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3) k3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k4) k4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) k1) 20))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dst_union22) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k1) k1) (TOK_SELEXPR (TOK_TABLE_OR_COL k2) k2) (TOK_SELEXPR (TOK_TABLE_OR_COL k3) k3) (TOK_SELEXPR (TOK_TABLE_OR_COL k4) k4)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (<= (TOK_TABLE_OR_COL k0) 50))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME dst_union22) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (> (TOK_TABLE_OR_COL k0) 50))))) b) (and (= (. (TOK_TABLE_OR_COL a) k1) (. (TOK_TABLE_OR_COL b) k1)) (= (. (TOK_TABLE_OR_COL a) ds) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k1) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k2) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3) k3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k4) k4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) k1) 20))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dst_union22) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-1 depends on stages: Stage-7 - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-7 is a root stage , consists of Stage-8, Stage-1 + Stage-8 has a backup stage: Stage-1 + Stage-6 depends on stages: Stage-8 + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 STAGE PLANS: Stage: Stage-7 + Conditional Operator + + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: null-subquery2:subq-subquery2:b:dst_union22_delta @@ -130,7 +138,7 @@ 1 [Column[_col1]] Position of Big Table: 0 - Stage: Stage-1 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: null-subquery2:subq-subquery2:a @@ -159,24 +167,35 @@ 1 [Column[_col1]] outputColumnNames: _col0, _col1, _col10, _col11 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col10 + type: string + expr: _col11 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col10,_col11 - columns.types string,string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string,string,string,string + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work - Needs Tagging: false + Needs Tagging: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -225,74 +244,55 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dst_union22 name: default.dst_union22 - Truncated Path -> Alias: - /dst_union22/ds=1 [null-subquery2:subq-subquery2:a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: #### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col10 - type: string - expr: _col11 - type: string - outputColumnNames: _col0, _col1, _col10, _col11 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col10 - type: string - expr: _col11 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 0 + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count -1 + columns k0,k1,k2,k3,k4,k5 + columns.types string:string:string:string:string:string #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: + name default.dst_union22_delta + numFiles 1 + numRows 500 + partition_columns ds + rawDataSize 16936 + serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 17436 #### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col10,_col11 - columns.types string,string,string,string - escape.delim \ + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - columns _col0,_col1,_col10,_col11 - columns.types string,string,string,string - escape.delim \ + bucket_count -1 + columns k0,k1,k2,k3,k4,k5 + columns.types string:string:string:string:string:string +#### A masked pattern was here #### + name default.dst_union22_delta + numFiles 1 + numPartitions 1 + numRows 500 + partition_columns ds + rawDataSize 16936 + serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 17436 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dst_union22_delta + name: default.dst_union22_delta Truncated Path -> Alias: -#### A masked pattern was here #### + /dst_union22/ds=1 [null-subquery2:subq-subquery2:a] - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -410,7 +410,7 @@ Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -503,17 +503,215 @@ name: default.dst_union22 #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery2:subq-subquery2:a + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (k1 > 20.0) + type: boolean + Reduce Output Operator + key expressions: + expr: k1 + type: string + sort order: + + Map-reduce partition columns: + expr: k1 + type: string + tag: 0 + value expressions: + expr: k1 + type: string + expr: k2 + type: string + expr: ds + type: string + null-subquery2:subq-subquery2:b:dst_union22_delta + TableScan + alias: dst_union22_delta + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((k0 > 50.0) and (k1 > 20.0)) + type: boolean + Select Operator + expressions: + expr: k1 + type: string + expr: k3 + type: string + expr: k4 + type: string + outputColumnNames: _col1, _col3, _col4 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 1 + value expressions: + expr: _col3 + type: string + expr: _col4 + type: string + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count -1 + columns k1,k2,k3,k4 + columns.types string:string:string:string +#### A masked pattern was here #### + name default.dst_union22 + numFiles 1 + numRows 500 + partition_columns ds + rawDataSize 11124 + serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11624 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns k1,k2,k3,k4 + columns.types string:string:string:string +#### A masked pattern was here #### + name default.dst_union22 + numFiles 1 + numPartitions 1 + numRows 500 + partition_columns ds + rawDataSize 11124 + serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11624 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dst_union22 + name: default.dst_union22 +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count -1 + columns k0,k1,k2,k3,k4,k5 + columns.types string:string:string:string:string:string +#### A masked pattern was here #### + name default.dst_union22_delta + numFiles 1 + numRows 500 + partition_columns ds + rawDataSize 16936 + serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 17436 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns k0,k1,k2,k3,k4,k5 + columns.types string:string:string:string:string:string +#### A masked pattern was here #### + name default.dst_union22_delta + numFiles 1 + numPartitions 1 + numRows 500 + partition_columns ds + rawDataSize 16936 + serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 17436 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dst_union22_delta + name: default.dst_union22_delta + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col3} {VALUE._col4} + filter mappings: + 0 [1, 1] + filter predicates: + 0 {(VALUE._col4 = '1')} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col10, _col11 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col10 + type: string + expr: _col11 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string,string,string,string + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Truncated Path -> Alias: + /dst_union22/ds=1 [null-subquery2:subq-subquery2:a] + /dst_union22_delta/ds=1 [null-subquery2:subq-subquery2:b:dst_union22_delta] + PREHOOK: query: insert overwrite table dst_union22 partition (ds='2') select * from ( select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 +select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on a.k1 = b.k1 and a.ds='1' where a.k1 > 20 @@ -530,7 +728,7 @@ ( select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 +select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on a.k1 = b.k1 and a.ds='1' where a.k1 > 20 Index: ql/src/test/results/clientpositive/join32.q.out =================================================================== --- ql/src/test/results/clientpositive/join32.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/join32.q.out (working copy) @@ -3,41 +3,41 @@ POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) STAGE DEPENDENCIES: - Stage-12 is a root stage - Stage-8 depends on stages: Stage-12 - Stage-11 depends on stages: Stage-8 - Stage-1 depends on stages: Stage-11 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-12 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: x Fetch Operator limit: -1 + z + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: x TableScan @@ -52,8 +52,21 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 + z + TableScan + alias: z + GatherStats: false + HashTable Sink Operator + condition expressions: + 0 {_col5} {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + Position of Big Table: 0 - Stage: Stage-8 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: y @@ -72,24 +85,54 @@ 1 [Column[key]] outputColumnNames: _col0, _col1, _col5 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col5} {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + outputColumnNames: _col1, _col4, _col9 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: string + expr: _col9 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work - Needs Tagging: false + Needs Tagging: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -135,202 +178,25 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src name: default.src - Truncated Path -> Alias: - /src [y] - - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - z - TableScan - alias: z - GatherStats: false - HashTable Sink Operator - condition expressions: - 0 {_col5} {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col1]] - 1 [Column[value]] - Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: #### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col5} {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col1]] - 1 [Column[value]] - outputColumnNames: _col1, _col4, _col9 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col9 - type: string - outputColumnNames: _col1, _col4, _col9 - Select Operator - expressions: - expr: _col4 - type: string - expr: _col9 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 -#### A masked pattern was here #### - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns key,value,val2 - columns.types string:string:string + columns key,value + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} + name default.src1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -338,68 +204,31 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns key,value,val2 - columns.types string:string:string + columns key,value + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} + name default.src1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 + name: default.src1 + name: default.src1 Truncated Path -> Alias: -#### A masked pattern was here #### + /src [y] - Stage: Stage-5 - Map Reduce - Alias -> Map Operator Tree: + Stage: Stage-0 + Move Operator + tables: + replace: true #### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - + table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -414,19 +243,15 @@ #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - name: default.dest_j1 - Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 - Move Operator - files: - hdfs directory: true + Stage: Stage-2 + Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY @@ -436,7 +261,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY Index: ql/src/test/results/clientpositive/bucketcontext_6.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_6.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketcontext_6.q.out (working copy) @@ -68,13 +68,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -120,21 +119,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -237,48 +235,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -309,7 +265,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -340,7 +297,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -362,21 +318,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -477,48 +432,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -549,7 +462,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketcontext_1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_1.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketcontext_1.q.out (working copy) @@ -81,13 +81,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -133,21 +132,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -250,48 +248,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -322,7 +278,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -355,7 +312,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -377,21 +333,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -492,48 +447,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -564,7 +477,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin10.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin10.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketmapjoin10.q.out (working copy) @@ -118,13 +118,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -163,21 +162,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -278,48 +276,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -350,7 +306,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/smb_mapjoin_15.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_15.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/smb_mapjoin_15.q.out (working copy) @@ -51,7 +51,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -74,21 +73,43 @@ 1 [Column[key]] outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -139,70 +160,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table1 name: default.test_table1 - Truncated Path -> Alias: - /test_table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col4, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ Reduce Operator Tree: Extract Limit @@ -224,7 +181,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /test_table1 [a] Stage: Stage-0 Fetch Operator @@ -346,7 +303,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -369,21 +325,55 @@ 1 [Column[key], Column[key2]] outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col6,_col7 - columns.types int,int,string,int,int,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col5 + type: int + expr: _col6 + type: int + expr: _col7 + type: string + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col5 + type: int + expr: _col6 + type: int + expr: _col7 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: int + expr: _col5 + type: string Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -434,82 +424,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table1 name: default.test_table1 - Truncated Path -> Alias: - /test_table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: int - expr: _col7 - type: string - outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: int - expr: _col7 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col3 - type: int - expr: _col4 - type: int - expr: _col5 - type: string - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col6,_col7 - columns.types int,int,string,int,int,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col6,_col7 - columns.types int,int,string,int,int,string - escape.delim \ Reduce Operator Tree: Extract Limit @@ -531,12 +445,13 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /test_table1 [a] Stage: Stage-0 Fetch Operator limit: 10 + PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -592,7 +507,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -615,21 +529,55 @@ 1 [Column[key2], Column[key]] outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col6,_col7 - columns.types int,int,string,int,int,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col5 + type: int + expr: _col6 + type: int + expr: _col7 + type: string + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col5 + type: int + expr: _col6 + type: int + expr: _col7 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: int + expr: _col5 + type: string Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -680,82 +628,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table1 name: default.test_table1 - Truncated Path -> Alias: - /test_table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: int - expr: _col7 - type: string - outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: int - expr: _col7 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col3 - type: int - expr: _col4 - type: int - expr: _col5 - type: string - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col6,_col7 - columns.types int,int,string,int,int,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col6,_col7 - columns.types int,int,string,int,int,string - escape.delim \ Reduce Operator Tree: Extract Limit @@ -777,12 +649,13 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /test_table1 [a] Stage: Stage-0 Fetch Operator limit: 10 + PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -837,13 +710,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -890,21 +762,55 @@ 1 [Column[key], Column[value]] outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col6,_col7 - columns.types int,int,string,int,int,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col5 + type: int + expr: _col6 + type: int + expr: _col7 + type: string + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col5 + type: int + expr: _col6 + type: int + expr: _col7 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col2 + type: string + expr: _col3 + type: int + expr: _col4 + type: int + expr: _col5 + type: string Local Work: Map Reduce Local Work Needs Tagging: false @@ -957,82 +863,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table1 name: default.test_table1 - Truncated Path -> Alias: - /test_table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: int - expr: _col7 - type: string - outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col5 - type: int - expr: _col6 - type: int - expr: _col7 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col2 - type: string - expr: _col3 - type: int - expr: _col4 - type: int - expr: _col5 - type: string - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col6,_col7 - columns.types int,int,string,int,int,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col5,_col6,_col7 - columns.types int,int,string,int,int,string - escape.delim \ Reduce Operator Tree: Extract Limit @@ -1054,7 +884,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /test_table1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/mapjoin_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_distinct.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/mapjoin_distinct.q.out (working copy) @@ -14,14 +14,13 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: d @@ -59,45 +58,34 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Group By Operator - bucketGroup: false - keys: + Select Operator + expressions: expr: _col1 type: string - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 type: string - sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 + outputColumnNames: _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator bucketGroup: false @@ -113,7 +101,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -195,13 +183,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: d @@ -239,45 +226,34 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Group By Operator - bucketGroup: false - keys: + Select Operator + expressions: expr: _col1 type: string - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + outputColumnNames: _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator bucketGroup: false @@ -347,14 +323,13 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: d @@ -392,38 +367,27 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 Reduce Operator Tree: Group By Operator bucketGroup: false @@ -439,7 +403,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -521,13 +485,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: d @@ -565,38 +528,27 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: -1 Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 Reduce Operator Tree: Group By Operator bucketGroup: false Index: ql/src/test/results/clientpositive/semijoin.q.out =================================================================== --- ql/src/test/results/clientpositive/semijoin.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/semijoin.q.out (working copy) @@ -1137,13 +1137,12 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -1193,38 +1192,27 @@ 1 [Column[_col0]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int Reduce Operator Tree: Extract File Output Operator @@ -1712,13 +1700,12 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t2) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b c))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: b @@ -1802,38 +1789,27 @@ 2 [Column[_col0]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int Reduce Operator Tree: Extract File Output Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out (working copy) @@ -70,13 +70,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -122,21 +121,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -191,47 +189,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -262,7 +219,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out (working copy) @@ -59,7 +59,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -85,35 +84,24 @@ 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 - type: bigint + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/join31.q.out =================================================================== --- ql/src/test/results/clientpositive/join31.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/join31.q.out (working copy) @@ -3,30 +3,38 @@ POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt +SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) group by subq1.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt +SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) group by subq1.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL subq1) key)))) + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL subq1) key)))) STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-5 - Stage-2 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-1, Stage-5 , consists of Stage-9, Stage-10, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-10 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-10 + Stage-2 Stage-5 is a root stage STAGE PLANS: @@ -84,14 +92,17 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-7 + Stage: Stage-8 + Conditional Operator + + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: -#### A masked pattern was here #### + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: -#### A masked pattern was here #### + $INTNAME HashTable Sink Operator condition expressions: 0 {_col0} @@ -100,12 +111,12 @@ keys: 0 [Column[_col0]] 1 [Column[_col0]] - Position of Big Table: 1 + Position of Big Table: 0 - Stage: Stage-2 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: -#### A masked pattern was here #### + $INTNAME1 Map Join Operator condition map: Inner Join 0 to 1 @@ -117,13 +128,27 @@ 0 [Column[_col0]] 1 [Column[_col0]] outputColumnNames: _col0 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work @@ -131,37 +156,18 @@ Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: + Reduce Output Operator + key expressions: expr: _col0 type: string - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -208,6 +214,119 @@ Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + $INTNAME1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: @@ -264,7 +383,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt +SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) group by subq1.key @@ -273,7 +392,7 @@ PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt +SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) group by subq1.key Index: ql/src/test/results/clientpositive/bucketcontext_5.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_5.q.out (revision 1438474) +++ ql/src/test/results/clientpositive/bucketcontext_5.q.out (working copy) @@ -54,13 +54,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -106,21 +105,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -173,47 +171,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -244,7 +201,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big [b] Stage: Stage-0 Fetch Operator @@ -271,7 +228,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -293,21 +249,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -358,47 +313,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -429,7 +343,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientnegative/join29.q.out =================================================================== --- ql/src/test/results/clientnegative/join29.q.out (revision 0) +++ ql/src/test/results/clientnegative/join29.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10227]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join35.q.out =================================================================== --- ql/src/test/results/clientnegative/join35.q.out (revision 0) +++ ql/src/test/results/clientnegative/join35.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10227]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join28.q.out =================================================================== --- ql/src/test/results/clientnegative/join28.q.out (revision 0) +++ ql/src/test/results/clientnegative/join28.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10227]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/union22.q.out =================================================================== --- ql/src/test/results/clientnegative/union22.q.out (revision 0) +++ ql/src/test/results/clientnegative/union22.q.out (working copy) @@ -0,0 +1,45 @@ +PREHOOK: query: create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dst_union22 +PREHOOK: query: create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dst_union22_delta +PREHOOK: query: insert overwrite table dst_union22 partition (ds='1') +select key, value, key , value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dst_union22@ds=1 +POSTHOOK: query: insert overwrite table dst_union22 partition (ds='1') +select key, value, key , value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dst_union22@ds=1 +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table dst_union22_delta partition (ds='1') +select key, key, value, key, value, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dst_union22_delta@ds=1 +POSTHOOK: query: insert overwrite table dst_union22_delta partition (ds='1') +select key, key, value, key, value, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dst_union22_delta@ds=1 +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException [Error 10227]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join32.q.out =================================================================== --- ql/src/test/results/clientnegative/join32.q.out (revision 0) +++ ql/src/test/results/clientnegative/join32.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10227]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/queries/clientpositive/join32.q =================================================================== --- ql/src/test/queries/clientpositive/join32.q (revision 1438474) +++ ql/src/test/queries/clientpositive/join32.q (working copy) @@ -1,13 +1,19 @@ CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; + +-- Since the inputs are small, it should be automatically converted to mapjoin + EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); Index: ql/src/test/queries/clientpositive/mapjoin_subquery2.q =================================================================== --- ql/src/test/queries/clientpositive/mapjoin_subquery2.q (revision 1438474) +++ ql/src/test/queries/clientpositive/mapjoin_subquery2.q (working copy) @@ -15,25 +15,25 @@ load data local inpath '../data/files/y.txt' INTO TABLE y; load data local inpath '../data/files/z.txt' INTO TABLE z; +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; + +-- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name FROM (SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 FROM y JOIN x ON (x.id = y.id)) subq JOIN z ON (subq.key1 = z.id); -EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 +(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 FROM y JOIN x ON (x.id = y.id)) subq JOIN z ON (subq.key1 = z.id); -SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id); - drop table x; drop table y; drop table z; Index: ql/src/test/queries/clientpositive/join29.q =================================================================== --- ql/src/test/queries/clientpositive/join29.q (revision 1438474) +++ ql/src/test/queries/clientpositive/join29.q (working copy) @@ -1,13 +1,19 @@ CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT); -EXPLAIN +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; + +-- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt +SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key); INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt +SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key); Index: ql/src/test/queries/clientpositive/mapjoin_subquery.q =================================================================== --- ql/src/test/queries/clientpositive/mapjoin_subquery.q (revision 1438474) +++ ql/src/test/queries/clientpositive/mapjoin_subquery.q (working copy) @@ -1,28 +1,34 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; + +-- Since the inputs are small, it should be automatically converted to mapjoin + EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); - -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value + +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +ORDER BY subq.key1, z.value; EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1; + order by subq.key1, z.value; - -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1; + order by subq.key1, z.value; Index: ql/src/test/queries/clientpositive/union22.q =================================================================== --- ql/src/test/queries/clientpositive/union22.q (revision 1438474) +++ ql/src/test/queries/clientpositive/union22.q (working copy) @@ -1,7 +1,4 @@ - create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string); - - create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string); insert overwrite table dst_union22 partition (ds='1') @@ -12,13 +9,19 @@ set hive.merge.mapfiles=false; +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; + +-- Since the inputs are small, it should be automatically converted to mapjoin + explain extended insert overwrite table dst_union22 partition (ds='2') select * from ( select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 +select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on a.k1 = b.k1 and a.ds='1' where a.k1 > 20 @@ -30,12 +33,11 @@ ( select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 +select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on a.k1 = b.k1 and a.ds='1' where a.k1 > 20 ) subq; - select * from dst_union22 where ds = '2' order by k1, k2, k3, k4; Index: ql/src/test/queries/clientpositive/join33.q =================================================================== --- ql/src/test/queries/clientpositive/join33.q (revision 1438474) +++ ql/src/test/queries/clientpositive/join33.q (working copy) @@ -1,13 +1,19 @@ CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; + +-- Since the inputs are small, it should be automatically converted to mapjoin + EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value +SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); Index: ql/src/test/queries/clientpositive/smb_mapjoin_14.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_14.q (revision 1438474) +++ ql/src/test/queries/clientpositive/smb_mapjoin_14.q (working copy) @@ -62,43 +62,6 @@ group by key ) subq2; --- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them. --- Each sub-query should be converted to a sort-merge join. -explain -select src1.key, src1.cnt1, src2.cnt1 from -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq1 group by key -) src1 -join -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq2 group by key -) src2 -on src1.key = src2.key -order by src1.key, src1.cnt1, src2.cnt1; - -select src1.key, src1.cnt1, src2.cnt1 from -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq1 group by key -) src1 -join -( - select key, count(*) as cnt1 from - ( - select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq2 group by key -) src2 -on src1.key = src2.key -order by src1.key, src1.cnt1, src2.cnt1; - -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join. explain Index: ql/src/test/queries/clientpositive/join34.q =================================================================== --- ql/src/test/queries/clientpositive/join34.q (revision 1438474) +++ ql/src/test/queries/clientpositive/join34.q (working copy) @@ -1,10 +1,14 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; -CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; +-- Since the inputs are small, it should be automatically converted to mapjoin EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value +SELECT x.key, x.value, subq1.value FROM ( SELECT x.key as key, x.value as value from src x where x.key < 20 UNION ALL @@ -13,7 +17,7 @@ JOIN src1 x ON (x.key = subq1.key); INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value +SELECT x.key, x.value, subq1.value FROM ( SELECT x.key as key, x.value as value from src x where x.key < 20 UNION ALL Index: ql/src/test/queries/clientpositive/multiMapJoin1.q =================================================================== --- ql/src/test/queries/clientpositive/multiMapJoin1.q (revision 0) +++ ql/src/test/queries/clientpositive/multiMapJoin1.q (working copy) @@ -0,0 +1,149 @@ +-- Join of a big table with 2 small tables on different keys should be performed as a single MR job +create table smallTbl1(key string, value string); +insert overwrite table smallTbl1 select * from src where key < 10; + +create table smallTbl2(key string, value string); +insert overwrite table smallTbl2 select * from src where key < 10; + +create table bigTbl(key string, value string); +insert overwrite table bigTbl +select * from +( + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src + union all + select * from src +) subq; + +set hive.auto.convert.join=true; + +explain +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value); + +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value); + +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; + +explain +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value); + +select count(*) FROM +(select bigTbl.key as key, bigTbl.value as value1, + bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 + on (bigTbl.key = smallTbl1.key) +) firstjoin +JOIN +smallTbl2 on (firstjoin.value1 = smallTbl2.value); + +create table smallTbl3(key string, value string); +insert overwrite table smallTbl3 select * from src where key < 10; + +drop table bigTbl; + +create table bigTbl(key1 string, key2 string, value string); +insert overwrite table bigTbl +select * from +( + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src + union all + select key as key1, key as key2, value from src +) subq; + +set hive.auto.convert.join.aggressivemapjoin=false; + +select count(*) FROM + ( + SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, + firstjoin.value1 as value1, firstjoin.value2 as value2 FROM + (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, + bigTbl.value as value1, bigTbl.value as value2 + FROM bigTbl JOIN smallTbl1 + on (bigTbl.key1 = smallTbl1.key) + ) firstjoin + JOIN + smallTbl2 on (firstjoin.value1 = smallTbl2.value) + ) secondjoin + JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key); + +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; + +-- join with 4 tables on different keys is also executed as a single MR job +explain +select count(*) FROM + ( + SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, + firstjoin.value1 as value1, firstjoin.value2 as value2 FROM + (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, + bigTbl.value as value1, bigTbl.value as value2 + FROM bigTbl JOIN smallTbl1 + on (bigTbl.key1 = smallTbl1.key) + ) firstjoin + JOIN + smallTbl2 on (firstjoin.value1 = smallTbl2.value) + ) secondjoin + JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key); + +select count(*) FROM + ( + SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, + firstjoin.value1 as value1, firstjoin.value2 as value2 FROM + (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, + bigTbl.value as value1, bigTbl.value as value2 + FROM bigTbl JOIN smallTbl1 + on (bigTbl.key1 = smallTbl1.key) + ) firstjoin + JOIN + smallTbl2 on (firstjoin.value1 = smallTbl2.value) + ) secondjoin + JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key); Index: ql/src/test/queries/clientpositive/mapjoin_mapjoin.q =================================================================== --- ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (revision 1438474) +++ ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (working copy) @@ -1,5 +1,11 @@ -explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key); +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; -explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds; +-- Since the inputs are small, it should be automatically converted to mapjoin -select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds; +explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key); + +explain select count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds; + +select count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds; Index: ql/src/test/queries/clientpositive/join31.q =================================================================== --- ql/src/test/queries/clientpositive/join31.q (revision 1438474) +++ ql/src/test/queries/clientpositive/join31.q (working copy) @@ -1,14 +1,20 @@ CREATE TABLE dest_j1(key STRING, cnt INT); +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; + +-- Since the inputs are small, it should be automatically converted to mapjoin + EXPLAIN INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt +SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) group by subq1.key; INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt +SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) group by subq1.key; Index: ql/src/test/queries/clientpositive/join35.q =================================================================== --- ql/src/test/queries/clientpositive/join35.q (revision 1438474) +++ ql/src/test/queries/clientpositive/join35.q (working copy) @@ -1,10 +1,14 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE; +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; -CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE; +-- Since the inputs are small, it should be automatically converted to mapjoin EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt +SELECT x.key, x.value, subq1.cnt FROM ( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key UNION ALL @@ -13,7 +17,7 @@ JOIN src1 x ON (x.key = subq1.key); INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt +SELECT x.key, x.value, subq1.cnt FROM ( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key UNION ALL Index: ql/src/test/queries/clientpositive/join28.q =================================================================== --- ql/src/test/queries/clientpositive/join28.q (revision 1438474) +++ ql/src/test/queries/clientpositive/join28.q (working copy) @@ -1,19 +1,23 @@ +CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE; +set hive.auto.convert.join=true; +set hive.auto.convert.join.aggressivemapjoin=true; +set hive.auto.convert.join.aggressivemapjoin.size=10000; -CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE; +-- Since the inputs are small, it should be automatically converted to mapjoin EXPLAIN INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +SELECT subq.key1, z.value FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); Index: ql/src/test/queries/clientpositive/smb_mapjoin_16.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_16.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_16.q (working copy) @@ -0,0 +1,21 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT *; + +-- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key; +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key; Index: ql/src/test/queries/clientnegative/union22.q =================================================================== --- ql/src/test/queries/clientnegative/union22.q (revision 0) +++ ql/src/test/queries/clientnegative/union22.q (working copy) @@ -0,0 +1,26 @@ +create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string); +create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string); + +insert overwrite table dst_union22 partition (ds='1') +select key, value, key , value from src; + +insert overwrite table dst_union22_delta partition (ds='1') +select key, key, value, key, value, value from src; + +set hive.merge.mapfiles=false; + +-- Union followed by Mapjoin is not supported. +-- The same query would work without the hint + +explain extended +insert overwrite table dst_union22 partition (ds='2') +select * from +( +select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 +union all +select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 +from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on +a.k1 = b.k1 and a.ds='1' +where a.k1 > 20 +) +subq; Index: ql/src/test/queries/clientnegative/join32.q =================================================================== --- ql/src/test/queries/clientnegative/join32.q (revision 0) +++ ql/src/test/queries/clientnegative/join32.q (working copy) @@ -0,0 +1,13 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; + +-- Mapjoin followed by Mapjoin is not supported. +-- The same query would work without the hint +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + + + + Index: ql/src/test/queries/clientnegative/join35.q =================================================================== --- ql/src/test/queries/clientnegative/join35.q (revision 0) +++ ql/src/test/queries/clientnegative/join35.q (working copy) @@ -0,0 +1,17 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE; + +-- Mapjoin followed by union is not supported. +-- The same query would work without the hint +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key); + + + + Index: ql/src/test/queries/clientnegative/join28.q =================================================================== --- ql/src/test/queries/clientnegative/join28.q (revision 0) +++ ql/src/test/queries/clientnegative/join28.q (working copy) @@ -0,0 +1,14 @@ +CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE; + +-- Mapjoin followed by mapjoin is not supported. +-- The same query would work fine without the hint. +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); + + + Index: ql/src/test/queries/clientnegative/join29.q =================================================================== --- ql/src/test/queries/clientnegative/join29.q (revision 0) +++ ql/src/test/queries/clientnegative/join29.q (working copy) @@ -0,0 +1,9 @@ +CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT); + +-- Mapjoin followed by group by is not supported. +-- The same query would work without the hint +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (working copy) @@ -1,98 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer; - -import java.io.Serializable; -import java.util.HashMap; -import java.util.Map; -import java.util.Stack; - -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; - -/** - * Processor for the rule - map join followed by reduce sink. - */ -public class GenMRRedSink4 implements NodeProcessor { - - public GenMRRedSink4() { - } - - /** - * Reduce Scan encountered. - * - * @param nd - * the reduce sink operator encountered - * @param opProcCtx - * context - */ - public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, - Object... nodeOutputs) throws SemanticException { - ReduceSinkOperator op = (ReduceSinkOperator) nd; - GenMRProcContext ctx = (GenMRProcContext) opProcCtx; - - ctx.getParseCtx(); - - // map-join consisted on a bunch of map-only jobs, and it has been split - // after the mapjoin - Operator reducer = op.getChildOperators().get(0); - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork plan = (MapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = ctx - .getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); - - ctx.setCurrTask(currTask); - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - // When the reducer is encountered for the first time - if (plan.getReducer() == null) { - GenMapRedUtils.initMapJoinPlan(op, ctx, true, null, true, -1); - // When mapjoin is followed by a multi-table insert - } else { - GenMapRedUtils.splitPlan(op, ctx); - } - } else { - // There is a join after mapjoin. One of the branches of mapjoin has already - // been initialized. - // Initialize the current branch, and join with the original plan. - assert plan.getReducer() != reducer; - GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, false, true, null); - } - - mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), - ctx.getCurrAliasId())); - - // the mapjoin operator has been processed - ctx.setCurrMapJoinOp(null); - return null; - } -} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (working copy) @@ -39,16 +39,13 @@ private final transient boolean[] mapOnlySubq; private final transient boolean[] mapOnlySubqSet; private final transient boolean[] rootTask; - private final transient boolean[] mapJoinSubq; private transient int numInputs; - private transient boolean mapJoinQuery; public UnionParseContext(int numInputs) { this.numInputs = numInputs; mapOnlySubq = new boolean[numInputs]; rootTask = new boolean[numInputs]; - mapJoinSubq = new boolean[numInputs]; mapOnlySubqSet = new boolean[numInputs]; } @@ -61,21 +58,6 @@ this.mapOnlySubqSet[pos] = true; } - public boolean getMapJoinSubq(int pos) { - return mapJoinSubq[pos]; - } - - public void setMapJoinSubq(int pos, boolean mapJoinSubq) { - this.mapJoinSubq[pos] = mapJoinSubq; - if (mapJoinSubq) { - mapJoinQuery = true; - } - } - - public boolean getMapJoinQuery() { - return mapJoinQuery; - } - public boolean getRootTask(int pos) { return rootTask[pos]; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (working copy) @@ -107,30 +107,6 @@ } /** - * Map-join subquery followed by Union. - */ - public static class MapJoinUnion implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - UnionOperator union = (UnionOperator) nd; - UnionProcContext ctx = (UnionProcContext) procCtx; - - // find the branch on which this processor was invoked - int pos = getPositionParent(union, stack); - UnionParseContext uCtx = ctx.getUnionParseContext(union); - if (uCtx == null) { - uCtx = new UnionParseContext(union.getConf().getNumInputs()); - } - - uCtx.setMapJoinSubq(pos, true); - ctx.setUnionParseContext(union, uCtx); - return null; - } - } - - /** * Union subquery followed by Union. */ public static class UnknownUnion implements NodeProcessor { @@ -330,10 +306,6 @@ return new MapUnion(); } - public static NodeProcessor getMapJoinUnion() { - return new MapJoinUnion(); - } - public static NodeProcessor getUnknownUnion() { return new UnknownUnion(); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (working copy) @@ -25,7 +25,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; @@ -79,9 +78,6 @@ opRules.put(new RuleRegExp("R3", TableScanOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"), UnionProcFactory.getMapUnion()); - opRules.put(new RuleRegExp("R4", - MapJoinOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"), - UnionProcFactory.getMapJoinUnion()); // The dispatcher fires the processor for the matching rule and passes the // context along Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -227,7 +227,7 @@ QBJoinTree newJoinTree = newWork.getJoinTree(); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = MapJoinProcessor.convertMapJoin(opParseCtxMap, op, - newJoinTree, mapJoinPos, true); + newJoinTree, mapJoinPos, true, false); // generate the local work and return the big table alias String bigTableAlias = MapJoinProcessor .genMapJoinLocalWork(newWork, newMapJoinOp, mapJoinPos); @@ -241,9 +241,43 @@ e.printStackTrace(); throw new SemanticException("Generate New MapJoin Opertor Exeception " + e.getMessage()); } + } + private static void checkParentOperatorType(Operator op) + throws SemanticException { + if (!op.opAllowedBeforeMapJoin()) { + throw new SemanticException(ErrorMsg.OPERATOR_NOT_ALLOWED_WITH_MAPJOIN.getMsg()); + } + if (op.getParentOperators() != null) { + for (Operator parentOp : op.getParentOperators()) { + checkParentOperatorType(parentOp); + } + } } + private static void checkChildOperatorType(Operator op) + throws SemanticException { + if (!op.opAllowedAfterMapJoin()) { + throw new SemanticException(ErrorMsg.OPERATOR_NOT_ALLOWED_WITH_MAPJOIN.getMsg()); + } + if (op.getChildOperators() != null) { + for (Operator childOp : op.getChildOperators()) { + checkChildOperatorType(childOp); + } + } + } + + private static void validateMapJoinTypes(Operator op) + throws SemanticException { + for (Operator parentOp : op.getParentOperators()) { + checkParentOperatorType(parentOp); + } + + for (Operator childOp : op.getChildOperators()) { + checkChildOperatorType(childOp); + } + } + /** * convert a regular join to a a map-side join. * @@ -259,8 +293,10 @@ */ public static MapJoinOperator convertMapJoin( LinkedHashMap, OpParseContext> opParseCtxMap, - JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin) + JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin, + boolean validateMapJoinTree) throws SemanticException { + // outer join cannot be performed on a table which is being cached JoinDesc desc = op.getConf(); JoinCondDesc[] condns = desc.getConds(); @@ -477,6 +513,11 @@ op.setChildOperators(null); op.setParentOperators(null); + // make sure only map-joins can be performed. + if (validateMapJoinTree) { + validateMapJoinTypes(mapJoinOp); + } + return mapJoinOp; } @@ -487,11 +528,10 @@ HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN); - LinkedHashMap, OpParseContext> opParseCtxMap = pctx .getOpParseCtx(); MapJoinOperator mapJoinOp = convertMapJoin(opParseCtxMap, op, joinTree, mapJoinPos, - noCheckOuterJoin); + noCheckOuterJoin, true); // create a dummy select to select all columns genSelectPlan(pctx, mapJoinOp); return mapJoinOp; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (working copy) @@ -27,7 +27,6 @@ import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -40,7 +39,6 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -155,90 +153,10 @@ } } - /** - * GenMRMapJoinCtx. - * - */ - public static class GenMRMapJoinCtx { - String taskTmpDir; - TableDesc tt_desc; - Operator rootMapJoinOp; - AbstractMapJoinOperator oldMapJoin; - - public GenMRMapJoinCtx() { - taskTmpDir = null; - tt_desc = null; - rootMapJoinOp = null; - oldMapJoin = null; - } - - /** - * @param taskTmpDir - * @param tt_desc - * @param rootMapJoinOp - * @param oldMapJoin - */ - public GenMRMapJoinCtx(String taskTmpDir, TableDesc tt_desc, - Operator rootMapJoinOp, - AbstractMapJoinOperator oldMapJoin) { - this.taskTmpDir = taskTmpDir; - this.tt_desc = tt_desc; - this.rootMapJoinOp = rootMapJoinOp; - this.oldMapJoin = oldMapJoin; - } - - public void setTaskTmpDir(String taskTmpDir) { - this.taskTmpDir = taskTmpDir; - } - - public String getTaskTmpDir() { - return taskTmpDir; - } - - public void setTTDesc(TableDesc tt_desc) { - this.tt_desc = tt_desc; - } - - public TableDesc getTTDesc() { - return tt_desc; - } - - /** - * @return the childSelect - */ - public Operator getRootMapJoinOp() { - return rootMapJoinOp; - } - - /** - * @param rootMapJoinOp - * the rootMapJoinOp to set - */ - public void setRootMapJoinOp(Operator rootMapJoinOp) { - this.rootMapJoinOp = rootMapJoinOp; - } - - /** - * @return the oldMapJoin - */ - public AbstractMapJoinOperator getOldMapJoin() { - return oldMapJoin; - } - - /** - * @param oldMapJoin - * the oldMapJoin to set - */ - public void setOldMapJoin(AbstractMapJoinOperator oldMapJoin) { - this.oldMapJoin = oldMapJoin; - } - } - private HiveConf conf; private HashMap, Task> opTaskMap; private HashMap unionTaskMap; - private HashMap, GenMRMapJoinCtx> mapJoinTaskMap; private List> seenOps; private List seenFileSinkOps; @@ -250,7 +168,6 @@ private Task currTask; private Operator currTopOp; private UnionOperator currUnionOp; - private AbstractMapJoinOperator currMapJoinOp; private String currAliasId; private List> rootOps; private DependencyCollectionTask dependencyTaskForMultiInsert; @@ -313,12 +230,10 @@ currTask = null; currTopOp = null; currUnionOp = null; - currMapJoinOp = null; currAliasId = null; rootOps = new ArrayList>(); rootOps.addAll(parseCtx.getTopOps().values()); unionTaskMap = new HashMap(); - mapJoinTaskMap = new HashMap, GenMRMapJoinCtx>(); dependencyTaskForMultiInsert = null; linkedFileDescTasks = null; } @@ -488,19 +403,7 @@ this.currUnionOp = currUnionOp; } - public AbstractMapJoinOperator getCurrMapJoinOp() { - return currMapJoinOp; - } - /** - * @param currMapJoinOp - * current map join operator - */ - public void setCurrMapJoinOp(AbstractMapJoinOperator currMapJoinOp) { - this.currMapJoinOp = currMapJoinOp; - } - - /** * @return current top alias */ public String getCurrAliasId() { @@ -523,14 +426,6 @@ unionTaskMap.put(op, uTask); } - public GenMRMapJoinCtx getMapJoinCtx(AbstractMapJoinOperator op) { - return mapJoinTaskMap.get(op); - } - - public void setMapJoinCtx(AbstractMapJoinOperator op, GenMRMapJoinCtx mjCtx) { - mapJoinTaskMap.put(op, mjCtx); - } - /** * Get the input set. */ Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (working copy) @@ -26,7 +26,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.Task; @@ -35,7 +34,6 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; @@ -44,10 +42,8 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; @@ -84,16 +80,10 @@ } UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union); - if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) { - GenMapRedUtils.mergeMapJoinUnion(union, ctx, - UnionProcFactory.getPositionParent(union, stack)); - } - else { - ctx.getMapCurrCtx().put( - (Operator) union, - new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), - ctx.getCurrAliasId())); - } + ctx.getMapCurrCtx().put( + (Operator) union, + new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), + ctx.getCurrAliasId())); // if the union is the first time seen, set current task to GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); @@ -103,7 +93,7 @@ ctx.setUnionTask(union, uCtxTask); } - Task uTask=ctx.getCurrTask(); + Task uTask = ctx.getCurrTask(); if (uTask.getParentTasks() == null || uTask.getParentTasks().isEmpty()) { if (!ctx.getRootTasks().contains(uTask)) { @@ -134,8 +124,9 @@ GenMRUnionCtx uCtxTask) { ParseContext parseCtx = ctx.getParseCtx(); - TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema( - parent.getSchema(), "temporarycol")); + TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils + .getFieldSchemasFromRowSchema( + parent.getSchema(), "temporarycol")); // generate the temporary file Context baseCtx = parseCtx.getContext(); @@ -150,7 +141,7 @@ parent.getChildOperators().set(0, fs_op); List> parentOpList = - new ArrayList>(); + new ArrayList>(); parentOpList.add(parent); fs_op.setParentOperators(parentOpList); @@ -158,7 +149,7 @@ Operator ts_op = OperatorFactory.get( new TableScanDesc(), parent.getSchema()); List> childOpList = - new ArrayList>(); + new ArrayList>(); childOpList.add(child); ts_op.setChildOperators(childOpList); child.replaceParent(parent, ts_op); @@ -212,27 +203,9 @@ } } - private void processSubQueryUnionMapJoin(GenMRProcContext ctx) { - AbstractMapJoinOperator mjOp = ctx.getCurrMapJoinOp(); - assert mjOp != null; - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mjOp); - assert mjCtx != null; - MapredWork plan = (MapredWork) ctx.getCurrTask().getWork(); - - String taskTmpDir = mjCtx.getTaskTmpDir(); - TableDesc tt_desc = mjCtx.getTTDesc(); - assert plan.getPathToAliases().get(taskTmpDir) == null; - plan.getPathToAliases().put(taskTmpDir, new ArrayList()); - plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, - new PartitionDesc(tt_desc, null)); - plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); - } - /** * Union Operator encountered . Currently, the algorithm is pretty simple: If - * all the sub-queries are map-only, don't do anything. However, if there is a - * mapjoin followed by the union, merge at the union Otherwise, insert a + * all the sub-queries are map-only, don't do anything. Otherwise, insert a * FileSink on top of all the sub-queries. * * This can be optimized later on. @@ -284,8 +257,7 @@ } // Copy into the current union task plan if - if (uPrsCtx.getMapOnlySubq(pos) - && !uPrsCtx.getMapJoinSubq(pos) && uPrsCtx.getRootTask(pos)) { + if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)) { processSubQueryUnionMerge(ctx, uCtxTask, union, stack); } // If it a map-reduce job, create a temporary file @@ -295,13 +267,10 @@ && (!ctx.getRootTasks().contains(currTask))) { ctx.getRootTasks().add(currTask); } - // If there is a mapjoin at position 'pos' - if (uPrsCtx.getMapJoinSubq(pos)) { - processSubQueryUnionMapJoin(ctx); - } - processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos), union, uTask, ctx, uCtxTask); - //the currAliasId and CurrTopOp is not valid any more + processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos), union, uTask, + ctx, uCtxTask); + // the currAliasId and CurrTopOp is not valid any more ctx.setCurrAliasId(null); ctx.setCurrTopOp(null); ctx.getOpTaskMap().put(null, uTask); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (working copy) @@ -81,7 +81,7 @@ } else { // This will happen in case of joins. The current plan can be thrown away // after being merged with the original plan - GenMapRedUtils.joinPlan(op, null, opMapTask, ctx, -1, false, false, null); + GenMapRedUtils.joinPlan(op, null, opMapTask, ctx, -1, false); currTask = opMapTask; ctx.setCurrTask(currTask); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -20,7 +20,6 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.ConcurrentModificationException; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; @@ -33,12 +32,10 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -47,19 +44,15 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; @@ -97,12 +90,12 @@ throws SemanticException { Operator reducer = op.getChildOperators().get(0); Map, GenMapRedCtx> mapCurrCtx = - opProcCtx.getMapCurrCtx(); + opProcCtx.getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); Task currTask = mapredCtx.getCurrTask(); MapredWork plan = (MapredWork) currTask.getWork(); HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); + opProcCtx.getOpTaskMap(); Operator currTopOp = opProcCtx.getCurrTopOp(); opTaskMap.put(reducer, currTask); @@ -114,7 +107,7 @@ List> rootTasks = opProcCtx.getRootTasks(); if (!rootTasks.contains(currTask)) { - rootTasks.add(currTask); + rootTasks.add(currTask); } if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); @@ -137,167 +130,8 @@ opProcCtx.setCurrAliasId(currAliasId); } - public static void initMapJoinPlan( - Operator op, GenMRProcContext ctx, - boolean readInputMapJoin, UnionOperator currUnionOp, boolean setReducer, int pos) - throws SemanticException { - initMapJoinPlan(op, ctx, readInputMapJoin, currUnionOp, setReducer, pos, false); - } /** - * Initialize the current plan by adding it to root tasks. - * - * @param op - * the map join operator encountered - * @param opProcCtx - * processing context - * @param pos - * position of the parent - */ - public static void initMapJoinPlan(Operator op, - GenMRProcContext opProcCtx, boolean readInputMapJoin, - UnionOperator currUnionOp, boolean setReducer, int pos, boolean createLocalPlan) - throws SemanticException { - Map, GenMapRedCtx> mapCurrCtx = - opProcCtx.getMapCurrCtx(); - assert (((pos == -1) && (readInputMapJoin)) || (pos != -1)); - int parentPos = (pos == -1) ? 0 : pos; - GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get( - parentPos)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork plan = (MapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); - Operator currTopOp = opProcCtx.getCurrTopOp(); - - // The mapjoin has already been encountered. Some context must be stored - // about that - if (readInputMapJoin) { - AbstractMapJoinOperator currMapJoinOp = opProcCtx.getCurrMapJoinOp(); - assert currMapJoinOp != null; - boolean local = ((pos == -1) || (pos == (currMapJoinOp.getConf()).getPosBigTable())) ? - false : true; - - if (setReducer) { - Operator reducer = op.getChildOperators().get(0); - plan.setReducer(reducer); - opTaskMap.put(reducer, currTask); - if (reducer.getClass() == JoinOperator.class) { - plan.setNeedsTagging(true); - } - ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf(); - plan.setNumReduceTasks(desc.getNumReducers()); - } else { - opTaskMap.put(op, currTask); - } - - if (currUnionOp == null) { - GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(currMapJoinOp); - String taskTmpDir; - TableDesc tt_desc; - Operator rootOp; - - if (mjCtx.getOldMapJoin() == null || setReducer) { - taskTmpDir = mjCtx.getTaskTmpDir(); - tt_desc = mjCtx.getTTDesc(); - rootOp = mjCtx.getRootMapJoinOp(); - } else { - GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(mjCtx - .getOldMapJoin()); - taskTmpDir = oldMjCtx.getTaskTmpDir(); - tt_desc = oldMjCtx.getTTDesc(); - rootOp = oldMjCtx.getRootMapJoinOp(); - } - - setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - setupBucketMapJoinInfo(plan, currMapJoinOp, createLocalPlan); - } else { - initUnionPlan(opProcCtx, currUnionOp, currTask, false); - } - - opProcCtx.setCurrMapJoinOp(null); - } else { - MapJoinDesc desc = (MapJoinDesc) op.getConf(); - - // The map is overloaded to keep track of mapjoins also - opTaskMap.put(op, currTask); - - List> rootTasks = opProcCtx.getRootTasks(); - if (!rootTasks.contains(currTask)) { - rootTasks.add(currTask); - } - - assert currTopOp != null; - List> seenOps = opProcCtx.getSeenOps(); - String currAliasId = opProcCtx.getCurrAliasId(); - - seenOps.add(currTopOp); - boolean local = (pos == desc.getPosBigTable()) ? false : true; - setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op, createLocalPlan); - } - - opProcCtx.setCurrTask(currTask); - opProcCtx.setCurrTopOp(null); - opProcCtx.setCurrAliasId(null); - } - - private static void setupBucketMapJoinInfo(MapredWork plan, - AbstractMapJoinOperator currMapJoinOp, boolean createLocalPlan) { - if (currMapJoinOp != null) { - Map>> aliasBucketFileNameMapping = - currMapJoinOp.getConf().getAliasBucketFileNameMapping(); - if(aliasBucketFileNameMapping!= null) { - MapredLocalWork localPlan = plan.getMapLocalWork(); - if(localPlan == null) { - if(currMapJoinOp instanceof SMBMapJoinOperator) { - localPlan = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork(); - } - if (localPlan == null && createLocalPlan) { - localPlan = new MapredLocalWork( - new LinkedHashMap>(), - new LinkedHashMap()); - } - } else { - //local plan is not null, we want to merge it into SMBMapJoinOperator's local work - if(currMapJoinOp instanceof SMBMapJoinOperator) { - MapredLocalWork smbLocalWork = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork(); - if(smbLocalWork != null) { - localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork()); - localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork()); - } - } - } - - if(localPlan == null) { - return; - } - - if(currMapJoinOp instanceof SMBMapJoinOperator) { - plan.setMapLocalWork(null); - ((SMBMapJoinOperator)currMapJoinOp).getConf().setLocalWork(localPlan); - } else { - plan.setMapLocalWork(localPlan); - } - BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext(); - localPlan.setBucketMapjoinContext(bucketMJCxt); - bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); - bucketMJCxt.setBucketFileNameMapping(currMapJoinOp.getConf().getBigTableBucketNumMapping()); - localPlan.setInputFileChangeSensitive(true); - bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); - bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); - bucketMJCxt.setBigTablePartSpecToFileMapping( - currMapJoinOp.getConf().getBigTablePartSpecToFileMapping()); - // BucketizedHiveInputFormat should be used for either sort merge join or bucket map join - if ((currMapJoinOp instanceof SMBMapJoinOperator) - || (currMapJoinOp.getConf().isBucketMapJoin())) { - plan.setUseBucketizedHiveInputFormat(true); - } - } - } - } - - /** * Initialize the current union plan. * * @param op @@ -312,7 +146,7 @@ MapredWork plan = (MapredWork) unionTask.getWork(); HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); + opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, unionTask); plan.setReducer(reducer); @@ -377,6 +211,7 @@ Task currTask, boolean local) throws SemanticException { MapredWork plan = (MapredWork) currTask.getWork(); + // In case of lateral views followed by a join, the same tree // can be traversed more than one if (currUnionOp != null) { @@ -433,13 +268,6 @@ opProcCtx.setCurrTask(existingTask); } - public static void joinPlan(Operator op, - Task oldTask, Task task, - GenMRProcContext opProcCtx, int pos, boolean split, - boolean readMapJoinData, UnionOperator currUnionOp) throws SemanticException { - joinPlan(op, oldTask, task, opProcCtx, pos, split, readMapJoinData, currUnionOp, false); - } - /** * Merge the current task with the task for the current reducer. * @@ -456,8 +284,7 @@ */ public static void joinPlan(Operator op, Task oldTask, Task task, - GenMRProcContext opProcCtx, int pos, boolean split, - boolean readMapJoinData, UnionOperator currUnionOp, boolean createLocalWork) + GenMRProcContext opProcCtx, int pos, boolean split) throws SemanticException { Task currTask = task; MapredWork plan = (MapredWork) currTask.getWork(); @@ -493,53 +320,15 @@ : true; } setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - if(op instanceof AbstractMapJoinOperator) { - setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op, createLocalWork); - } } currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); - } else if (opProcCtx.getCurrMapJoinOp() != null) { - AbstractMapJoinOperator mjOp = opProcCtx.getCurrMapJoinOp(); - if (currUnionOp != null) { - initUnionPlan(opProcCtx, currUnionOp, currTask, false); - } else { - GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp); - - // In case of map-join followed by map-join, the file needs to be - // obtained from the old map join - AbstractMapJoinOperator oldMapJoin = mjCtx.getOldMapJoin(); - String taskTmpDir = null; - TableDesc tt_desc = null; - Operator rootOp = null; - - boolean local = ((pos == -1) || (pos == (mjOp.getConf()) - .getPosBigTable())) ? false : true; - if (oldMapJoin == null) { - if (opProcCtx.getParseCtx().getListMapJoinOpsNoReducer().contains(mjOp) - || local || (oldTask != null) && (parTasks != null)) { - taskTmpDir = mjCtx.getTaskTmpDir(); - tt_desc = mjCtx.getTTDesc(); - rootOp = mjCtx.getRootMapJoinOp(); - } - } else { - GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(oldMapJoin); - assert oldMjCtx != null; - taskTmpDir = oldMjCtx.getTaskTmpDir(); - tt_desc = oldMjCtx.getTTDesc(); - rootOp = oldMjCtx.getRootMapJoinOp(); - } - - setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - setupBucketMapJoinInfo(plan, oldMapJoin, createLocalWork); - } - opProcCtx.setCurrMapJoinOp(null); } if ((oldTask != null) && (parTasks != null)) { for (Task parTask : parTasks) { parTask.addDependentTask(currTask); - if(opProcCtx.getRootTasks().contains(currTask)) { + if (opProcCtx.getRootTasks().contains(currTask)) { opProcCtx.getRootTasks().remove(currTask); } } @@ -557,7 +346,7 @@ * processing context */ public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) - throws SemanticException { + throws SemanticException { // Generate a new task ParseContext parseCtx = opProcCtx.getParseCtx(); MapredWork cplan = getMapRedWork(parseCtx); @@ -572,7 +361,7 @@ cplan.setNumReduceTasks(new Integer(desc.getNumReducers())); HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); + opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, redTask); Task currTask = opProcCtx.getCurrTask(); @@ -622,7 +411,6 @@ return currentInput; } - /** * set the current task in the mapredWork. * @@ -657,12 +445,12 @@ if (partsList == null) { try { - partsList = parseCtx.getOpToPartList().get((TableScanOperator)topOp); + partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp); if (partsList == null) { partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), - parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), - alias_id, parseCtx.getPrunedPartitions()); - parseCtx.getOpToPartList().put((TableScanOperator)topOp, partsList); + parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), + alias_id, parseCtx.getPrunedPartitions()); + parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList); } } catch (SemanticException e) { throw e; @@ -701,7 +489,8 @@ long sizeNeeded = Integer.MAX_VALUE; int fileLimit = -1; if (parseCtx.getGlobalLimitCtx().isEnable()) { - long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITMAXROWSIZE); + long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), + HiveConf.ConfVars.HIVELIMITMAXROWSIZE); sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow; // for the optimization that reduce number of input file, we limit number // of files allowed. If more than specific number of files have to be @@ -709,7 +498,7 @@ // inputs can cause unpredictable latency. It's not necessarily to be // cheaper. fileLimit = - HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE); + HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE); if (sizePerRow <= 0 || fileLimit <= 0) { LOG.info("Skip optimization to reduce input size of 'limit'"); @@ -735,6 +524,7 @@ // partitioned table and whether any partition is selected or not PlanUtils.addInput(inputs, new ReadEntity(parseCtx.getTopToTable().get(topOp), parentViewInfo)); + for (Partition part : parts) { if (part.getTable().isPartitioned()) { PlanUtils.addInput(inputs, new ReadEntity(part, parentViewInfo)); @@ -907,7 +697,7 @@ Operator topOp, MapredWork plan, boolean local, TableDesc tt_desc) throws SemanticException { - if(path == null || alias == null) { + if (path == null || alias == null) { return; } @@ -989,8 +779,8 @@ MapredWork work = new MapredWork(); boolean mapperCannotSpanPartns = - conf.getBoolVar( - HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); + conf.getBoolVar( + HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); work.setMapperCannotSpanPartns(mapperCannotSpanPartns); work.setPathToAliases(new LinkedHashMap>()); work.setPathToPartitionInfo(new LinkedHashMap()); @@ -1071,7 +861,7 @@ // replace the reduce child with this operator List> childOpList = parent - .getChildOperators(); + .getChildOperators(); for (int pos = 0; pos < childOpList.size(); pos++) { if (childOpList.get(pos) == op) { childOpList.set(pos, fs_op); @@ -1080,7 +870,7 @@ } List> parentOpList = - new ArrayList>(); + new ArrayList>(); parentOpList.add(parent); fs_op.setParentOperators(parentOpList); @@ -1096,7 +886,7 @@ op.getParentOperators().set(posn, ts_op); Map, GenMapRedCtx> mapCurrCtx = - opProcCtx.getMapCurrCtx(); + opProcCtx.getMapCurrCtx(); mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null, null)); String streamDesc = taskTmpDir; @@ -1124,101 +914,12 @@ // Add the path to alias mapping setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, local, tt_desc); - - // This can be cleaned up as a function table in future - if (op instanceof AbstractMapJoinOperator) { - AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) op; - opProcCtx.setCurrMapJoinOp(mjOp); - GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp); - if (mjCtx == null) { - mjCtx = new GenMRMapJoinCtx(taskTmpDir, tt_desc, ts_op, null); - } else { - mjCtx.setTaskTmpDir(taskTmpDir); - mjCtx.setTTDesc(tt_desc); - mjCtx.setRootMapJoinOp(ts_op); - } - opProcCtx.setMapJoinCtx(mjOp, mjCtx); - opProcCtx.getMapCurrCtx().put(parent, - new GenMapRedCtx(childTask, null, null)); - setupBucketMapJoinInfo(cplan, mjOp, false); - } - - currTopOp = null; - String currAliasId = null; - - opProcCtx.setCurrTopOp(currTopOp); - opProcCtx.setCurrAliasId(currAliasId); + opProcCtx.setCurrTopOp(null); + opProcCtx.setCurrAliasId(null); opProcCtx.setCurrTask(childTask); } - public static void mergeMapJoinUnion(UnionOperator union, - GenMRProcContext ctx, int pos) throws SemanticException { - ParseContext parseCtx = ctx.getParseCtx(); - UnionProcContext uCtx = parseCtx.getUCtx(); - - UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union); - assert uPrsCtx != null; - - Task currTask = ctx.getCurrTask(); - - GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); - Task uTask = null; - - union.getParentOperators().get(pos); - MapredWork uPlan = null; - - // union is encountered for the first time - if (uCtxTask == null) { - uCtxTask = new GenMRUnionCtx(); - uPlan = GenMapRedUtils.getMapRedWork(parseCtx); - uTask = TaskFactory.get(uPlan, parseCtx.getConf()); - uCtxTask.setUTask(uTask); - ctx.setUnionTask(union, uCtxTask); - } else { - uTask = uCtxTask.getUTask(); - uPlan = (MapredWork) uTask.getWork(); - } - - // If there is a mapjoin at position 'pos' - if (uPrsCtx.getMapJoinSubq(pos)) { - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(ctx.getCurrMapJoinOp()); - String taskTmpDir = mjCtx.getTaskTmpDir(); - if (uPlan.getPathToAliases().get(taskTmpDir) == null) { - uPlan.getPathToAliases().put(taskTmpDir, new ArrayList()); - uPlan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - uPlan.getPathToPartitionInfo().put(taskTmpDir, - new PartitionDesc(mjCtx.getTTDesc(), null)); - uPlan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); - } - - for (Task t : currTask.getParentTasks()) { - t.addDependentTask(uTask); - } - try { - boolean notDone = true; - while (notDone) { - for (Task t : currTask.getParentTasks()) { - t.removeDependentTask(currTask); - } - notDone = false; - } - } catch (ConcurrentModificationException e) { - } - } else { - setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), uPlan, false, ctx); - } - - ctx.setCurrTask(uTask); - ctx.setCurrAliasId(null); - ctx.setCurrTopOp(null); - ctx.setCurrMapJoinOp(null); - - ctx.getMapCurrCtx().put(union, - new GenMapRedCtx(ctx.getCurrTask(), null, null)); - } - private GenMapRedUtils() { // prevent instantiation } - } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (working copy) @@ -18,49 +18,38 @@ package org.apache.hadoop.hive.ql.optimizer; import java.io.Serializable; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Stack; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Utils; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.TableDesc; /** * Operator factory for MapJoin processing. */ public final class MapJoinFactory { - public static int getPositionParent(AbstractMapJoinOperator op, Stack stack) { + public static int getPositionParent(AbstractMapJoinOperator op, + Stack stack) { int pos = 0; int size = stack.size(); assert size >= 2 && stack.get(size - 1) == op; Operator parent = - (Operator) stack.get(size - 2); + (Operator) stack.get(size - 2); List> parOp = op.getParentOperators(); pos = parOp.indexOf(parent); assert pos < parOp.size(); @@ -72,217 +61,148 @@ */ public static class TableScanMapJoin implements NodeProcessor { - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; - GenMRProcContext ctx = (GenMRProcContext) procCtx; + public static void setupBucketMapJoinInfo(MapredWork plan, + AbstractMapJoinOperator currMapJoinOp) { + if (currMapJoinOp != null) { + Map>> aliasBucketFileNameMapping = + currMapJoinOp.getConf().getAliasBucketFileNameMapping(); + if (aliasBucketFileNameMapping != null) { + MapredLocalWork localPlan = plan.getMapLocalWork(); + if (localPlan == null) { + if (currMapJoinOp instanceof SMBMapJoinOperator) { + localPlan = ((SMBMapJoinOperator) currMapJoinOp).getConf().getLocalWork(); + } + } else { + // local plan is not null, we want to merge it into SMBMapJoinOperator's local work + if (currMapJoinOp instanceof SMBMapJoinOperator) { + MapredLocalWork smbLocalWork = ((SMBMapJoinOperator) currMapJoinOp).getConf() + .getLocalWork(); + if (smbLocalWork != null) { + localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork()); + localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork()); + } + } + } - // find the branch on which this processor was invoked - int pos = getPositionParent(mapJoin, stack); + if (localPlan == null) { + return; + } - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get( - pos)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork currPlan = (MapredWork) currTask.getWork(); - Operator currTopOp = mapredCtx.getCurrTopOp(); - String currAliasId = mapredCtx.getCurrAliasId(); - Operator reducer = mapJoin; - HashMap, Task> opTaskMap = - ctx.getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); - - ctx.setCurrTopOp(currTopOp); - ctx.setCurrAliasId(currAliasId); - ctx.setCurrTask(currTask); - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - assert currPlan.getReducer() == null; - GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, false, null, false, pos); - } else { - // The current plan can be thrown away after being merged with the - // original plan - GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, - false, null); - currTask = opMapTask; - ctx.setCurrTask(currTask); + if (currMapJoinOp instanceof SMBMapJoinOperator) { + plan.setMapLocalWork(null); + ((SMBMapJoinOperator) currMapJoinOp).getConf().setLocalWork(localPlan); + } else { + plan.setMapLocalWork(localPlan); + } + BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext(); + localPlan.setBucketMapjoinContext(bucketMJCxt); + bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); + bucketMJCxt.setBucketFileNameMapping( + currMapJoinOp.getConf().getBigTableBucketNumMapping()); + localPlan.setInputFileChangeSensitive(true); + bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); + bucketMJCxt + .setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); + bucketMJCxt.setBigTablePartSpecToFileMapping( + currMapJoinOp.getConf().getBigTablePartSpecToFileMapping()); + // BucketizedHiveInputFormat should be used for either sort merge join or bucket map join + if ((currMapJoinOp instanceof SMBMapJoinOperator) + || (currMapJoinOp.getConf().isBucketMapJoin())) { + plan.setUseBucketizedHiveInputFormat(true); + } + } } - - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx - .getCurrTopOp(), ctx.getCurrAliasId())); - return null; } - } - /** - * ReduceSink followed by MapJoin. - */ - public static class ReduceSinkMapJoin implements NodeProcessor { + /** + * Initialize the current plan by adding it to root tasks. + * + * @param op + * the map join operator encountered + * @param opProcCtx + * processing context + * @param pos + * position of the parent + */ + private static void initMapJoinPlan(AbstractMapJoinOperator op, + GenMRProcContext opProcCtx, int pos) + throws SemanticException { + Map, GenMapRedCtx> mapCurrCtx = + opProcCtx.getMapCurrCtx(); + int parentPos = (pos == -1) ? 0 : pos; + GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get( + parentPos)); + Task currTask = mapredCtx.getCurrTask(); + MapredWork plan = (MapredWork) currTask.getWork(); + HashMap, Task> opTaskMap = + opProcCtx.getOpTaskMap(); + Operator currTopOp = opProcCtx.getCurrTopOp(); - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; - GenMRProcContext opProcCtx = (GenMRProcContext) procCtx; + MapJoinDesc desc = (MapJoinDesc) op.getConf(); - ParseContext parseCtx = opProcCtx.getParseCtx(); - MapredWork cplan = GenMapRedUtils.getMapRedWork(parseCtx); - Task redTask = TaskFactory.get(cplan, parseCtx - .getConf()); - Task currTask = opProcCtx.getCurrTask(); + // The map is overloaded to keep track of mapjoins also + opTaskMap.put(op, currTask); - // find the branch on which this processor was invoked - int pos = getPositionParent(mapJoin, stack); - boolean local = (pos == ((mapJoin.getConf())).getPosBigTable()) ? false - : true; + List> rootTasks = opProcCtx.getRootTasks(); + assert (!rootTasks.contains(currTask)); + rootTasks.add(currTask); - GenMapRedUtils.splitTasks(mapJoin, currTask, redTask, opProcCtx, false, - local, pos); + assert currTopOp != null; + opProcCtx.getSeenOps().add(currTopOp); - currTask = opProcCtx.getCurrTask(); - HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); - Task opMapTask = opTaskMap.get(mapJoin); - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - assert cplan.getReducer() == null; - opTaskMap.put(mapJoin, currTask); - opProcCtx.setCurrMapJoinOp(null); - } else { - // The current plan can be thrown away after being merged with the - // original plan - GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, opProcCtx, pos, - false, false, null); - currTask = opMapTask; - opProcCtx.setCurrTask(currTask); - } - - return null; + String currAliasId = opProcCtx.getCurrAliasId(); + boolean local = (pos == desc.getPosBigTable()) ? false : true; + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); + setupBucketMapJoinInfo(plan, op); } - } - /** - * MapJoin followed by Select. - */ - public static class MapJoin implements NodeProcessor { - /** - * Create a task by splitting the plan below the join. The reason, we have - * to do so in the processing of Select and not MapJoin is due to the - * walker. While processing a node, it is not safe to alter its children - * because that will decide the course of the walk. It is perfectly fine to - * muck around with its parents though, since those nodes have already been - * visited. + * Merge the current task with the task for the current reducer. + * + * @param op + * operator being processed + * @param oldTask + * the old task for the current reducer + * @param task + * the current task for the current reducer + * @param opProcCtx + * processing context + * @param pos + * position of the parent in the stack */ - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { + public static void joinMapJoinPlan(AbstractMapJoinOperator op, + Task task, + GenMRProcContext opProcCtx, int pos) + throws SemanticException { + Task currTask = task; + MapredWork plan = (MapredWork) currTask.getWork(); + Operator currTopOp = opProcCtx.getCurrTopOp(); + List> parTasks = null; - SelectOperator sel = (SelectOperator) nd; - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) sel.getParentOperators().get( - 0); - assert sel.getParentOperators().size() == 1; + List> seenOps = opProcCtx.getSeenOps(); + String currAliasId = opProcCtx.getCurrAliasId(); - GenMRProcContext ctx = (GenMRProcContext) procCtx; - ParseContext parseCtx = ctx.getParseCtx(); - - // is the mapjoin followed by a reducer - List> listMapJoinOps = parseCtx - .getListMapJoinOpsNoReducer(); - - if (listMapJoinOps.contains(mapJoin)) { - ctx.setCurrAliasId(null); - ctx.setCurrTopOp(null); - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - mapCurrCtx.put((Operator) nd, new GenMapRedCtx( - ctx.getCurrTask(), null, null)); - return null; + if (!seenOps.contains(currTopOp)) { + seenOps.add(currTopOp); + boolean local = false; + if (pos != -1) { + local = (pos == ((MapJoinDesc) op.getConf()).getPosBigTable()) ? false + : true; + } + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); + setupBucketMapJoinInfo(plan, op); } - - ctx.setCurrMapJoinOp(mapJoin); - - Task currTask = ctx.getCurrTask(); - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); - if (mjCtx == null) { - mjCtx = new GenMRMapJoinCtx(); - ctx.setMapJoinCtx(mapJoin, mjCtx); - } - - MapredWork mjPlan = GenMapRedUtils.getMapRedWork(parseCtx); - Task mjTask = TaskFactory.get(mjPlan, parseCtx - .getConf()); - - TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils - .getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol")); - - // generate the temporary file - Context baseCtx = parseCtx.getContext(); - String taskTmpDir = baseCtx.getMRTmpFileURI(); - - // Add the path to alias mapping - mjCtx.setTaskTmpDir(taskTmpDir); - mjCtx.setTTDesc(tt_desc); - mjCtx.setRootMapJoinOp(sel); - - sel.setParentOperators(null); - - // Create a file sink operator for this file name - Operator fs_op = OperatorFactory.get( - new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar( - HiveConf.ConfVars.COMPRESSINTERMEDIATE)), mapJoin.getSchema()); - - assert mapJoin.getChildOperators().size() == 1; - mapJoin.getChildOperators().set(0, fs_op); - - List> parentOpList = - new ArrayList>(); - parentOpList.add(mapJoin); - fs_op.setParentOperators(parentOpList); - - currTask.addDependentTask(mjTask); - - ctx.setCurrTask(mjTask); - ctx.setCurrAliasId(null); - ctx.setCurrTopOp(null); - - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - mapCurrCtx.put((Operator) nd, new GenMapRedCtx( - ctx.getCurrTask(), null, null)); - - return null; + currTopOp = null; + opProcCtx.setCurrTopOp(currTopOp); + opProcCtx.setCurrTask(currTask); } - } - /** - * MapJoin followed by MapJoin. - */ - public static class MapJoinMapJoin implements NodeProcessor { - @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - AbstractMapJoinOperator mapJoin = - (AbstractMapJoinOperator) nd; + Object... nodeOutputs) throws SemanticException { + AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; GenMRProcContext ctx = (GenMRProcContext) procCtx; - ctx.getParseCtx(); - AbstractMapJoinOperator oldMapJoin = ctx.getCurrMapJoinOp(); - - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); - if (mjCtx != null) { - mjCtx.setOldMapJoin(oldMapJoin); - } else { - ctx.setMapJoinCtx(mapJoin, new GenMRMapJoinCtx(null, null, null, - oldMapJoin)); - } - ctx.setCurrMapJoinOp(mapJoin); - // find the branch on which this processor was invoked int pos = getPositionParent(mapJoin, stack); @@ -292,97 +212,29 @@ pos)); Task currTask = mapredCtx.getCurrTask(); MapredWork currPlan = (MapredWork) currTask.getWork(); - mapredCtx.getCurrAliasId(); + Operator currTopOp = mapredCtx.getCurrTopOp(); + String currAliasId = mapredCtx.getCurrAliasId(); Operator reducer = mapJoin; HashMap, Task> opTaskMap = - ctx.getOpTaskMap(); + ctx.getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); + ctx.setCurrTopOp(currTopOp); + ctx.setCurrAliasId(currAliasId); ctx.setCurrTask(currTask); // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { assert currPlan.getReducer() == null; - GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, null, false, pos); + initMapJoinPlan(mapJoin, ctx, pos); } else { // The current plan can be thrown away after being merged with the // original plan - GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, ctx, pos, false, - true, null); + joinMapJoinPlan(mapJoin, opMapTask, ctx, pos); currTask = opMapTask; ctx.setCurrTask(currTask); } - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), null, null)); - return null; - } - } - - /** - * Union followed by MapJoin. - */ - public static class UnionMapJoin implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - GenMRProcContext ctx = (GenMRProcContext) procCtx; - - ParseContext parseCtx = ctx.getParseCtx(); - UnionProcContext uCtx = parseCtx.getUCtx(); - - // union was map only - no special processing needed - if (uCtx.isMapOnlySubq()) { - return (new TableScanMapJoin()) - .process(nd, stack, procCtx, nodeOutputs); - } - - UnionOperator currUnion = Utils.findNode(stack, UnionOperator.class); - assert currUnion != null; - ctx.getUnionTask(currUnion); - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; - - // find the branch on which this processor was invoked - int pos = getPositionParent(mapJoin, stack); - - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get( - pos)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork currPlan = (MapredWork) currTask.getWork(); - Operator reducer = mapJoin; - HashMap, Task> opTaskMap = - ctx.getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); - - // union result cannot be a map table - boolean local = (pos != mapJoin.getConf().getPosBigTable()); - if (local) { - throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_TABLE.getMsg()); - } - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - assert currPlan.getReducer() == null; - ctx.setCurrMapJoinOp(mapJoin); - GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, currUnion, false, pos); - ctx.setCurrUnionOp(null); - } else { - // The current plan can be thrown away after being merged with the - // original plan - Task uTask = ctx.getUnionTask(currUnion).getUTask(); - if (uTask.getId().equals(opMapTask.getId())) { - GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, - false, currUnion); - } else { - GenMapRedUtils.joinPlan(mapJoin, uTask, opMapTask, ctx, pos, false, - false, currUnion); - } - currTask = opMapTask; - ctx.setCurrTask(currTask); - } - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx .getCurrTopOp(), ctx.getCurrAliasId())); return null; @@ -393,22 +245,6 @@ return new TableScanMapJoin(); } - public static NodeProcessor getUnionMapJoin() { - return new UnionMapJoin(); - } - - public static NodeProcessor getReduceSinkMapJoin() { - return new ReduceSinkMapJoin(); - } - - public static NodeProcessor getMapJoin() { - return new MapJoin(); - } - - public static NodeProcessor getMapJoinMapJoin() { - return new MapJoinMapJoin(); - } - private MapJoinFactory() { // prevent instantiation } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java (working copy) @@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream; import java.io.InputStream; import java.io.Serializable; +import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -27,10 +28,13 @@ import java.util.Map; import java.util.Stack; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.ConditionalTask; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapRedTask; import org.apache.hadoop.hive.ql.exec.Operator; @@ -50,10 +54,11 @@ org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx; import org.apache.hadoop.hive.ql.plan.ConditionalWork; import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; - public class CommonJoinResolver implements PhysicalPlanResolver { @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { @@ -71,7 +76,6 @@ return pctx; } - /** * Iterator each tasks. If this task has a local work,create a new task for this local work, named * MapredLocalTask. then make this new generated task depends on current task's parent task, and @@ -86,7 +90,130 @@ physicalContext = context; } - private ConditionalTask processCurrentTask(MapRedTask currTask, + // Get the position of the big table for this join operator and the given alias + private int getPosition(MapredWork work, Operator joinOp, + String alias) { + Operator parentOp = work.getAliasToWork().get(alias); + + // reduceSinkOperator's child is null, but joinOperator's parents is reduceSink + while ((parentOp.getChildOperators() != null) && + (!parentOp.getChildOperators().isEmpty())) { + parentOp = parentOp.getChildOperators().get(0); + } + + return joinOp.getParentOperators().indexOf(parentOp); + } + + /* + * A task and its child task has been converted from join to mapjoin. + * See if the two tasks can be merged. + */ + private void mergeMapJoinTaskWithChildMapJoinTask(MapRedTask task) { + MapRedTask childTask = (MapRedTask)task.getChildTasks().get(0); + MapredWork work = task.getWork(); + MapredLocalWork localWork = work.getMapLocalWork(); + MapredWork childWork = childTask.getWork(); + MapredLocalWork childLocalWork = childWork.getMapLocalWork(); + + // Can this be merged + Map> aliasToWork = work.getAliasToWork(); + if (aliasToWork.size() > 1) { + return; + } + + Operator op = aliasToWork.values().iterator().next(); + while (op.getChildOperators() != null) { + // Dont perform this optimization for multi-table inserts + if (op.getChildOperators().size() > 1) { + return; + } + op = op.getChildOperators().get(0); + } + + if (!(op instanceof FileSinkOperator)) { + return; + } + + FileSinkOperator fop = (FileSinkOperator)op; + String workDir = fop.getConf().getDirName(); + + Map> childPathToAliases = childWork.getPathToAliases(); + if (childPathToAliases.size() > 1) { + return; + } + + // The filesink writes to a different directory + if (!childPathToAliases.keySet().iterator().next().equals(workDir)) { + return; + } + + // Either of them should not be bucketed + if ((localWork.getBucketMapjoinContext() != null) || + (childLocalWork.getBucketMapjoinContext() != null)) { + return; + } + + // Merge the trees + if (childWork.getAliasToWork().size() > 1) { + return; + } + + Operator childAliasOp = + childWork.getAliasToWork().values().iterator().next(); + if (fop.getParentOperators().size() > 1) { + return; + } + + // Merge the 2 trees - remove the FileSinkOperator from the first tree pass it to the + // top of the second + Operator parentFOp = fop.getParentOperators().get(0); + parentFOp.getChildOperators().remove(fop); + parentFOp.getChildOperators().add(childAliasOp); + List> parentOps = + new ArrayList>(); + parentOps.add(parentFOp); + childAliasOp.setParentOperators(parentOps); + + work.getAliasToPartnInfo().putAll(childWork.getAliasToPartnInfo()); + for (Map.Entry childWorkEntry : + childWork.getPathToPartitionInfo().entrySet()) { + if (childWork.getAliasToPartnInfo().containsValue(childWorkEntry.getKey())) { + work.getPathToPartitionInfo().put(childWorkEntry.getKey(), childWorkEntry.getValue()); + } + } + + localWork.getAliasToFetchWork().putAll(childLocalWork.getAliasToFetchWork()); + localWork.getAliasToWork().putAll(childLocalWork.getAliasToWork()); + + // remove the child task + List> oldChildTasks = childTask.getChildTasks(); + task.setChildTasks(oldChildTasks); + if (oldChildTasks != null) { + for (Task oldChildTask : oldChildTasks) { + oldChildTask.getParentTasks().remove(childTask); + oldChildTask.getParentTasks().add(task); + } + } + } + + // create map join task and set big table as bigTablePosition + private ObjectPair convertTaskToMapJoinTask(String xml, + int bigTablePosition) throws UnsupportedEncodingException, SemanticException { + // deep copy a new mapred work from xml + InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8")); + MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf()); + // create a mapred task for this work + MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext + .getParseContext().getConf()); + JoinOperator newJoinOp = getJoinOp(newTask); + + // optimize this newWork and assume big table position is i + String bigTableAlias = + MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, bigTablePosition); + return new ObjectPair(newTask, bigTableAlias); + } + + private Task processCurrentTask(MapRedTask currTask, ConditionalTask conditionalTask, Context context) throws SemanticException { @@ -98,13 +225,15 @@ currTask.setTaskTag(Task.COMMON_JOIN); MapredWork currWork = currTask.getWork(); + // create conditional work list and task list List listWorks = new ArrayList(); List> listTasks = new ArrayList>(); // create alias to task mapping and alias to input file mapping for resolver HashMap> aliasToTask = new HashMap>(); - HashMap> pathToAliases = currTask.getWork().getPathToAliases(); + HashMap> pathToAliases = currWork.getPathToAliases(); + Map> aliasToWork = currWork.getAliasToWork(); // get parseCtx for this Join Operator ParseContext parseCtx = physicalContext.getParseContext(); @@ -134,7 +263,7 @@ for (String alias : aliasList) { aliasTotalKnownInputSize += size; Long es = aliasToSize.get(alias); - if(es == null) { + if (es == null) { es = new Long(0); } es += size; @@ -149,13 +278,68 @@ if (bigTableCandidates == null) { return null; } + + Configuration conf = context.getConf(); + + // If all tables (but 1) are smaller than the size, convert the join into map-join and + // don't create a conditional task + boolean convertJoinMapJoin = HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVECONVERTJOINAGGMAPJOIN); + int bigTablePosition = -1; + if (convertJoinMapJoin) { + // This is the threshold that the user has specified to fit in mapjoin + long mapJoinSize = HiveConf.getLongVar(conf, + HiveConf.ConfVars.HIVECONVERTJOINAGGMAPJOINSIZE); + + boolean bigTableFound = false; + for (String alias : aliasToWork.keySet()) { + Long size = aliasToSize.get(alias); + // If more than 2 tables are bigger than the threshold, then we create + // a conditional task. The size is not available at compile time if the + // input is a sub-query. + if ((size == null) || (size > mapJoinSize)) { + if (bigTableFound) { + convertJoinMapJoin = false; + break; + } + bigTableFound = true; + bigTablePosition = getPosition(currWork, joinOp, alias); + } + } + } + + String bigTableAlias = null; currWork.setOpParseCtxMap(parseCtx.getOpParseCtx()); currWork.setJoinTree(joinTree); - String xml = currWork.toXML(); - String bigTableAlias = null; - long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(context.getConf(), + if (convertJoinMapJoin) { + // If all the tables are small enough to fit in the mapjoin threshold, choose + // a table randomly to the big table + if (bigTablePosition < 0) { + bigTablePosition = getPosition(currWork, joinOp, + aliasToWork.keySet().iterator().next()); + } + + // create map join task and set big table as bigTablePosition + MapRedTask newTask = convertTaskToMapJoinTask(xml, bigTablePosition).getFirst(); + + newTask.setTaskTag(Task.MAPJOIN_ONLY_NOBACKUP); + replaceTask(currTask, newTask, physicalContext); + + // Can this task be merged with the child task. This can happen if a big table is being + // joined with multiple small tables on different keys + // Further optimizations are possible here, a join which has been converted to a mapjoin + // followed by a mapjoin can be performed in a single MR job. + if ((newTask.getChildTasks() != null) && (newTask.getChildTasks().size() == 1) + && (newTask.getChildTasks().get(0).getTaskTag() == Task.MAPJOIN_ONLY_NOBACKUP)) { + mergeMapJoinTaskWithChildMapJoinTask(newTask); + } + + return newTask; + } + + long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVESMALLTABLESFILESIZE); for (int i = 0; i < numAliases; i++) { // this table cannot be big table @@ -164,17 +348,10 @@ } // create map join task and set big table as i - // deep copy a new mapred work from xml - InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8")); - MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf()); - // create a mapred task for this work - MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext - .getParseContext().getConf()); - JoinOperator newJoinOp = getJoinOp(newTask); + ObjectPair newTaskAlias = convertTaskToMapJoinTask(xml, i); + MapRedTask newTask = newTaskAlias.getFirst(); + bigTableAlias = newTaskAlias.getSecond(); - // optimize this newWork and assume big table position is i - bigTableAlias = MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, i); - Long aliasKnownSize = aliasToSize.get(bigTableAlias); if (aliasKnownSize != null && aliasKnownSize.longValue() > 0) { long smallTblTotalKnownSize = aliasTotalKnownInputSize @@ -186,7 +363,7 @@ } // add into conditional task - listWorks.add(newWork); + listWorks.add(newTask.getWork()); listTasks.add(newTask); newTask.setTaskTag(Task.CONVERTED_MAPJOIN); @@ -263,6 +440,41 @@ } } + // Replace the task with the new task. Copy the children and parents of the old + // task to the new task. + private void replaceTask( + Task currTask, Task newTask, + PhysicalContext physicalContext) { + // add this task into task tree + // set all parent tasks + List> parentTasks = currTask.getParentTasks(); + currTask.setParentTasks(null); + if (parentTasks != null) { + for (Task tsk : parentTasks) { + // make new generated task depends on all the parent tasks of current task. + tsk.addDependentTask(newTask); + // remove the current task from its original parent task's dependent task + tsk.removeDependentTask(currTask); + } + } else { + // remove from current root task and add conditional task to root tasks + physicalContext.removeFromRootTask(currTask); + physicalContext.addToRootTask(newTask); + } + + // set all child tasks + List> oldChildTasks = currTask.getChildTasks(); + currTask.setChildTasks(null); + if (oldChildTasks != null) { + for (Task tsk : oldChildTasks) { + // make new generated task depends on all the parent tasks of current task. + newTask.addDependentTask(tsk); + // remove the current task from its original parent task's dependent task + tsk.getParentTasks().remove(currTask); + } + } + } + @Override public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { @@ -280,14 +492,15 @@ List> taskList = ((ConditionalTask) currTask).getListTasks(); for (Task tsk : taskList) { if (tsk.isMapRedTask()) { - ConditionalTask cndTask = this.processCurrentTask((MapRedTask) tsk, + Task newTask = this.processCurrentTask((MapRedTask) tsk, ((ConditionalTask) currTask), physicalContext.getContext()); - walkerCtx.addToDispatchList(cndTask); + walkerCtx.addToDispatchList(newTask); } } } else { - ConditionalTask cndTask = this.processCurrentTask((MapRedTask) currTask, null, physicalContext.getContext()); - walkerCtx.addToDispatchList(cndTask); + Task newTask = + this.processCurrentTask((MapRedTask) currTask, null, physicalContext.getContext()); + walkerCtx.addToDispatchList(newTask); } } return null; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (working copy) @@ -71,7 +71,7 @@ if (opMapTask == null) { GenMapRedUtils.splitPlan(op, ctx); } else { - GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true, false, null); + GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true); currTask = opMapTask; ctx.setCurrTask(currTask); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy) @@ -32,12 +32,10 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MapRedTask; import org.apache.hadoop.hive.ql.exec.MoveTask; import org.apache.hadoop.hive.ql.exec.Operator; @@ -52,7 +50,6 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; @@ -67,7 +64,6 @@ import org.apache.hadoop.hive.ql.plan.ExtractDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -774,13 +770,7 @@ private String processFS(Node nd, Stack stack, NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException { - // Is it the dummy file sink after the mapjoin FileSinkOperator fsOp = (FileSinkOperator) nd; - if ((fsOp.getParentOperators().size() == 1) - && (fsOp.getParentOperators().get(0) instanceof MapJoinOperator)) { - return null; - } - GenMRProcContext ctx = (GenMRProcContext) opProcCtx; List seenFSOps = ctx.getSeenFileSinkOps(); if (seenFSOps == null) { @@ -884,24 +874,6 @@ return dest; } - AbstractMapJoinOperator currMapJoinOp = ctx.getCurrMapJoinOp(); - - if (currMapJoinOp != null) { - opTaskMap.put(null, currTask); - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(currMapJoinOp); - MapredWork plan = (MapredWork) currTask.getWork(); - - String taskTmpDir = mjCtx.getTaskTmpDir(); - TableDesc tt_desc = mjCtx.getTTDesc(); - assert plan.getPathToAliases().get(taskTmpDir) == null; - plan.getPathToAliases().put(taskTmpDir, new ArrayList()); - plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, - new PartitionDesc(tt_desc, null)); - plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); - return dest; - } - return dest; } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (working copy) @@ -75,9 +75,9 @@ public static final int CONVERTED_MAPJOIN = 2; public static final int CONVERTED_LOCAL_MAPJOIN = 3; public static final int BACKUP_COMMON_JOIN = 4; - public static final int LOCAL_MAPJOIN=5; + public static final int LOCAL_MAPJOIN = 5; + public static final int MAPJOIN_ONLY_NOBACKUP = 6; - // Descendants tasks who subscribe feeds from this task protected transient List> feedSubscribers; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (working copy) @@ -148,4 +148,14 @@ public OperatorType getType() { return OperatorType.UNION; } + + @Override + public boolean opAllowedBeforeMapJoin() { + return false; + } + + @Override + public boolean opAllowedAfterMapJoin() { + return false; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy) @@ -1429,4 +1429,22 @@ public boolean supportUnionRemoveOptimization() { return false; } + + /* + * This operator is allowed before mapjoin. Eventually, mapjoin hint should be done away with. + * But, since bucketized mapjoin and sortmerge join depend on it completely. it is needed. + * Check the operators which are allowed before mapjoin. + */ + public boolean opAllowedBeforeMapJoin() { + return true; + } + + /* + * This operator is allowed after mapjoin. Eventually, mapjoin hint should be done away with. + * But, since bucketized mapjoin and sortmerge join depend on it completely. it is needed. + * Check the operators which are allowed after mapjoin. + */ + public boolean opAllowedAfterMapJoin() { + return true; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (working copy) @@ -321,4 +321,9 @@ public OperatorType getType() { return OperatorType.REDUCESINK; } + + @Override + public boolean opAllowedBeforeMapJoin() { + return false; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (working copy) @@ -934,4 +934,13 @@ this.posToAliasMap = posToAliasMap; } + @Override + public boolean opAllowedBeforeMapJoin() { + return false; + } + + @Override + public boolean opAllowedAfterMapJoin() { + return false; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java (working copy) @@ -19,15 +19,11 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; @@ -51,7 +47,7 @@ HashMap> pathToAliases; HashMap aliasToKnownSize; private Task commonJoinTask; - + private String localTmpDir; private String hdfsTmpDir; @@ -155,7 +151,7 @@ return (int)(size - o.size); } } - + private String resolveMapJoinTask( HashMap> pathToAliases, HashMap> aliasToTask, @@ -164,14 +160,14 @@ String bigTableFileAlias = null; long smallTablesFileSizeSum = 0; - + Map aliasToFileSizeMap = new HashMap(); for (Map.Entry entry : aliasToKnownSize.entrySet()) { String alias = entry.getKey(); AliasFileSizePair pair = new AliasFileSizePair(alias, entry.getValue()); aliasToFileSizeMap.put(alias, pair); } - + try { // need to compute the input size at runtime, and select the biggest as // the big table. @@ -199,7 +195,7 @@ } // generate file size to alias mapping; but not set file size as key, // because different file may have the same file size. - + List aliasFileSizeList = new ArrayList( aliasToFileSizeMap.values()); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -72,7 +72,6 @@ import org.apache.hadoop.hive.ql.exec.RecordWriter; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -107,7 +106,6 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; -import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4; import org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1; import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; @@ -2441,7 +2439,7 @@ boolean subQuery = qb.getParseInfo().getIsSubQ(); if (expr.getType() == HiveParser.TOK_ALLCOLREF) { pos = genColListRegex(".*", expr.getChildCount() == 0 ? null - : getUnescapedName((ASTNode)expr.getChild(0)).toLowerCase(), + : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery); selectStar = true; } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause @@ -2455,7 +2453,7 @@ } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0) - .getChild(0).getText().toLowerCase())) && !hasAsClause + .getChild(0).getText().toLowerCase())) && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(1).getText()))) { // In case the expression is TABLE.COL (col can be regex). @@ -2463,7 +2461,7 @@ // We don't allow this for ExprResolver - the Group By case pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText() - .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch, + .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery); } else { // Case when this is an expression @@ -5113,7 +5111,7 @@ // set the stats publishing/aggregating key prefix // the same as directory name. The directory name - // can be changed in the optimizer but the key should not be changed + // can be changed in the optimizer but the key should not be changed // it should be the same as the MoveWork's sourceDir. fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName()); @@ -8087,24 +8085,9 @@ opRules.put(new RuleRegExp(new String("R6"), UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink3()); - opRules.put(new RuleRegExp(new String("R6"), - MapJoinOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), - new GenMRRedSink4()); opRules.put(new RuleRegExp(new String("R7"), - TableScanOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), + MapJoinOperator.getOperatorName() + "%"), MapJoinFactory.getTableScanMapJoin()); - opRules.put(new RuleRegExp(new String("R8"), - ReduceSinkOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getReduceSinkMapJoin()); - opRules.put(new RuleRegExp(new String("R9"), - UnionOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getUnionMapJoin()); - opRules.put(new RuleRegExp(new String("R10"), - MapJoinOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getMapJoinMapJoin()); - opRules.put(new RuleRegExp(new String("R11"), - MapJoinOperator.getOperatorName() + "%" + SelectOperator.getOperatorName() + "%"), - MapJoinFactory.getMapJoin()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1438474) +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy) @@ -324,6 +324,9 @@ "(higher than the number of rows per input row due to grouping sets in the query), or " + "rewrite the query to not use distincts."), + OPERATOR_NOT_ALLOWED_WITH_MAPJOIN(10227, + "All operators are not allowed with mapjoin hint. Remove the mapjoin hint."), + SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. " + "It may have crashed with an error."),