Index: build.properties =================================================================== --- build.properties (revision 1436745) +++ build.properties (working copy) @@ -79,7 +79,7 @@ # (measured in milliseconds). Ignored if fork is disabled. When running # multiple tests inside the same Java VM (see forkMode), timeout # applies to the time that all tests use together, not to an individual test. -test.junit.timeout=43200000 +test.junit.timeout=432000000 # Use this property to selectively disable tests from the command line: # ant test -Dtest.junit.exclude="**/TestCliDriver.class" Index: ql/src/test/results/clientpositive/join29.q.out =================================================================== --- ql/src/test/results/clientpositive/join29.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/join29.q.out (working copy) @@ -1,318 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) cnt))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-11 depends on stages: Stage-1, Stage-9 - Stage-2 depends on stages: Stage-11 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-9 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq2:y - TableScan - alias: y - Select Operator - expressions: - expr: key - type: string - outputColumnNames: key - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: -#### A masked pattern was here #### - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: -#### A masked pattern was here #### - HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: bigint - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: bigint - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-4 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-6 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-9 - Map Reduce - Alias -> Map Operator Tree: - subq1:x - TableScan - alias: x - Select Operator - expressions: - expr: key - type: string - outputColumnNames: key - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - -PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.cnt1 EXPRESSION [(src1)x.null, ] -POSTHOOK: Lineage: dest_j1.cnt2 EXPRESSION [(src)y.null, ] -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: select * from dest_j1 x order by x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: query: select * from dest_j1 x order by x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: Lineage: dest_j1.cnt1 EXPRESSION [(src1)x.null, ] -POSTHOOK: Lineage: dest_j1.cnt2 EXPRESSION [(src)y.null, ] -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -128 1 3 -146 1 2 -150 1 1 -213 1 2 -224 1 2 -238 1 2 -255 1 2 -273 1 3 -278 1 2 -311 1 3 -369 1 3 -401 1 5 -406 1 4 -66 1 1 -98 1 2 Index: ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out (working copy) @@ -71,7 +71,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -93,21 +92,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -162,47 +160,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -233,7 +190,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/join35.q.out =================================================================== --- ql/src/test/results/clientpositive/join35.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/join35.q.out (working copy) @@ -1,704 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-11 depends on stages: Stage-1, Stage-9 - Stage-2 depends on stages: Stage-11 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-9 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:x - TableScan - alias: x - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (key < 20.0) - type: boolean - Select Operator - expressions: - expr: key - type: string - outputColumnNames: key - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numPartitions 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numPartitions 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Truncated Path -> Alias: - /src [null-subquery1:subq1-subquery1:x] - - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {key} {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - GatherStats: false - Union - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col1} - 1 {key} {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col1, _col2, _col3 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - outputColumnNames: _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false -#### A masked pattern was here #### - TableScan - GatherStats: false - Union - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col1} - 1 {key} {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col1, _col2, _col3 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - outputColumnNames: _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ -#### A masked pattern was here #### - Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 -#### A masked pattern was here #### - - Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-4 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10003 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10003 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-9 - Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq1-subquery2:x1 - TableScan - alias: x1 - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (key > 100.0) - type: boolean - Select Operator - expressions: - expr: key - type: string - outputColumnNames: key - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numPartitions 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numPartitions 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Truncated Path -> Alias: - /src [null-subquery2:subq1-subquery2:x1] - - -PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.null, (src)x1.null, ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from dest_j1 x order by x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: query: select * from dest_j1 x order by x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.null, (src)x1.null, ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -128 3 -146 val_146 2 -150 val_150 1 -213 val_213 2 -224 2 -238 val_238 2 -255 val_255 2 -273 val_273 3 -278 val_278 2 -311 val_311 3 -369 3 -401 val_401 5 -406 val_406 4 Index: ql/src/test/results/clientpositive/mapjoin_subquery2.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_subquery2.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/mapjoin_subquery2.q.out (working copy) @@ -1,273 +0,0 @@ -PREHOOK: query: drop table x -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table x -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table y -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table y -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table z -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table z -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE x (name STRING, id INT) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE x (name STRING, id INT) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@x -PREHOOK: query: CREATE TABLE y (id INT, name STRING) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE y (id INT, name STRING) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@y -PREHOOK: query: CREATE TABLE z (id INT, name STRING) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE z (id INT, name STRING) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@z -PREHOOK: query: load data local inpath '../data/files/x.txt' INTO TABLE x -PREHOOK: type: LOAD -PREHOOK: Output: default@x -POSTHOOK: query: load data local inpath '../data/files/x.txt' INTO TABLE x -POSTHOOK: type: LOAD -POSTHOOK: Output: default@x -PREHOOK: query: load data local inpath '../data/files/y.txt' INTO TABLE y -PREHOOK: type: LOAD -PREHOOK: Output: default@y -POSTHOOK: query: load data local inpath '../data/files/y.txt' INTO TABLE y -POSTHOOK: type: LOAD -POSTHOOK: Output: default@y -PREHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z -PREHOOK: type: LOAD -PREHOOK: Output: default@z -POSTHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z -POSTHOOK: type: LOAD -POSTHOOK: Output: default@z -PREHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id) -PREHOOK: type: QUERY -PREHOOK: Input: default@x -PREHOOK: Input: default@y -PREHOOK: Input: default@z -#### A masked pattern was here #### -POSTHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@x -POSTHOOK: Input: default@y -POSTHOOK: Input: default@z -#### A masked pattern was here #### -2 Joe 2 Tie 2 Tie -2 Hank 2 Tie 2 Tie -PREHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id) -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME y)) (TOK_TABREF (TOK_TABNAME x)) (= (. (TOK_TABLE_OR_COL x) id) (. (TOK_TABLE_OR_COL y) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) id) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) name) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) id) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) name) value2)))) subq) (TOK_TABREF (TOK_TABNAME z)) (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) id)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) name))))) - -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - subq:x - Fetch Operator - limit: -1 - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq:x - TableScan - alias: x - HashTable Sink Operator - condition expressions: - 0 {id} {name} - 1 {name} {id} - handleSkewJoin: false - keys: - 0 [Column[id]] - 1 [Column[id]] - Position of Big Table: 0 - z - TableScan - alias: z - HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} - 1 {id} {name} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[id]] - Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq:y - TableScan - alias: y - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {id} {name} - 1 {name} {id} - handleSkewJoin: false - keys: - 0 [Column[id]] - 1 [Column[id]] - outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: int - outputColumnNames: _col0, _col1, _col4, _col5 - Select Operator - expressions: - expr: _col5 - type: int - expr: _col4 - type: string - expr: _col0 - type: int - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} - 1 {id} {name} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[id]] - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id) -PREHOOK: type: QUERY -PREHOOK: Input: default@x -PREHOOK: Input: default@y -PREHOOK: Input: default@z -#### A masked pattern was here #### -POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@x -POSTHOOK: Input: default@y -POSTHOOK: Input: default@z -#### A masked pattern was here #### -2 Joe 2 Tie 2 Tie -2 Hank 2 Tie 2 Tie -PREHOOK: query: drop table x -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@x -PREHOOK: Output: default@x -POSTHOOK: query: drop table x -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@x -POSTHOOK: Output: default@x -PREHOOK: query: drop table y -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@y -PREHOOK: Output: default@y -POSTHOOK: query: drop table y -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@y -POSTHOOK: Output: default@y -PREHOOK: query: drop table z -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@z -PREHOOK: Output: default@z -POSTHOOK: query: drop table z -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@z -POSTHOOK: Output: default@z Index: ql/src/test/results/clientpositive/join30.q.out =================================================================== --- ql/src/test/results/clientpositive/join30.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/join30.q.out (working copy) @@ -15,14 +15,13 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: x @@ -60,50 +59,39 @@ 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: + Select Operator + expressions: expr: _col0 type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: + outputColumnNames: _col0 + Select Operator + expressions: expr: _col0 type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: @@ -147,7 +135,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator Index: ql/src/test/results/clientpositive/bucketcontext_4.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_4.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketcontext_4.q.out (working copy) @@ -81,13 +81,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -133,21 +132,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -204,47 +202,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -275,7 +232,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator @@ -308,7 +265,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -330,21 +286,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -399,47 +354,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -470,7 +384,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin9.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin9.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketmapjoin9.q.out (working copy) @@ -70,13 +70,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -115,21 +114,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -184,47 +182,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -255,7 +212,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator @@ -336,13 +293,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -381,21 +337,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -450,47 +405,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -521,7 +435,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin13.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin13.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketmapjoin13.q.out (working copy) @@ -98,13 +98,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -143,21 +142,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -260,48 +258,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -332,7 +288,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -390,13 +347,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '2')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -442,21 +398,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -511,47 +466,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -582,7 +496,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -652,13 +566,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -704,21 +617,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -773,47 +685,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -844,7 +715,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -916,13 +787,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -968,21 +838,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -1037,47 +906,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -1108,7 +936,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/join28.q.out =================================================================== --- ql/src/test/results/clientpositive/join28.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/join28.q.out (working copy) @@ -1,334 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))))) - -STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-1 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - subq:x - Fetch Operator - limit: -1 - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq:x - TableScan - alias: x - HashTable Sink Operator - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 - z - TableScan - alias: z - HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq:y - TableScan - alias: y - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-5 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - -PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from dest_j1 x order by x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: query: select * from dest_j1 x order by x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -146 val_146 -146 val_146 -146 val_146 -146 val_146 -150 val_150 -213 val_213 -213 val_213 -213 val_213 -213 val_213 -224 val_224 -224 val_224 -224 val_224 -224 val_224 -238 val_238 -238 val_238 -238 val_238 -238 val_238 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -278 val_278 -278 val_278 -278 val_278 -278 val_278 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -66 val_66 -98 val_98 -98 val_98 -98 val_98 -98 val_98 Index: ql/src/test/results/clientpositive/smb_mapjoin_13.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (working copy) @@ -77,7 +77,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -100,21 +99,43 @@ 1 [Column[value]] outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -165,70 +186,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table1 name: default.test_table1 - Truncated Path -> Alias: - /test_table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col4, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ Reduce Operator Tree: Extract Limit @@ -250,7 +207,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /test_table1 [a] Stage: Stage-0 Fetch Operator @@ -307,13 +264,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table3) a) (TOK_TABREF (TOK_TABNAME test_table4) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -353,21 +309,43 @@ 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[value]()] outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string Local Work: Map Reduce Local Work Needs Tagging: false @@ -420,70 +398,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 name: default.test_table3 - Truncated Path -> Alias: - /test_table3 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col4, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ Reduce Operator Tree: Extract Limit @@ -505,7 +419,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /test_table3 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out (working copy) @@ -64,13 +64,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table_desc1) a) (TOK_TABREF (TOK_TABNAME table_desc2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 10)))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -116,37 +115,26 @@ 1 [Column[key], Column[value]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/join34.q.out =================================================================== --- ql/src/test/results/clientpositive/join34.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/join34.q.out (working copy) @@ -1,503 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value -FROM -( SELECT x.key as key, x.value as value from src x where x.key < 20 - UNION ALL - SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 -) subq1 -JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value -FROM -( SELECT x.key as key, x.value as value from src x where x.key < 20 - UNION ALL - SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 -) subq1 -JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) value) value)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) - -STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-1 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {key} {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - null-subquery1:subq1-subquery1:x - TableScan - alias: x - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (key < 20.0) - type: boolean - Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 - Union - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col1} - 1 {key} {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col1, _col2, _col3 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - outputColumnNames: _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - null-subquery2:subq1-subquery2:x1 - TableScan - alias: x1 - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (key > 100.0) - type: boolean - Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 - Union - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col1} - 1 {key} {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col1, _col2, _col3 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - outputColumnNames: _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numPartitions 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numPartitions 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [null-subquery1:subq1-subquery1:x, null-subquery2:subq1-subquery2:x1] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 -#### A masked pattern was here #### - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - -PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value -FROM -( SELECT x.key as key, x.value as value from src x where x.key < 20 - UNION ALL - SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 -) subq1 -JOIN src1 x ON (x.key = subq1.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value -FROM -( SELECT x.key as key, x.value as value from src x where x.key < 20 - UNION ALL - SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 -) subq1 -JOIN src1 x ON (x.key = subq1.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.FieldSchema(name:value, type:string, comment:default), (src)x1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from dest_j1 x order by x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: query: select * from dest_j1 x order by x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.FieldSchema(name:value, type:string, comment:default), (src)x1.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -128 val_128 -128 val_128 -128 val_128 -146 val_146 val_146 -146 val_146 val_146 -150 val_150 val_150 -213 val_213 val_213 -213 val_213 val_213 -224 val_224 -224 val_224 -238 val_238 val_238 -238 val_238 val_238 -255 val_255 val_255 -255 val_255 val_255 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -278 val_278 val_278 -278 val_278 val_278 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -369 val_369 -369 val_369 -369 val_369 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 Index: ql/src/test/results/clientpositive/skewjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoin.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/skewjoin.q.out (working copy) @@ -1524,13 +1524,12 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) k) (TOK_TABREF (TOK_TABNAME T1) v) (= (+ (. (TOK_TABLE_OR_COL k) key) 1) (. (TOK_TABLE_OR_COL v) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST v))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL k) key)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL v) val))))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: v @@ -1568,48 +1567,37 @@ 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] outputColumnNames: _col0, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Group By Operator + aggregations: + expr: sum(hash(_col0)) + expr: sum(hash(_col5)) + bucketGroup: false + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + expr: _col1 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col5)) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/bucketcontext_8.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_8.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketcontext_8.q.out (working copy) @@ -94,13 +94,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -146,21 +145,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -267,48 +265,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -339,7 +295,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -374,7 +331,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -396,21 +352,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -515,48 +470,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -587,7 +500,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketcontext_3.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_3.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketcontext_3.q.out (working copy) @@ -69,13 +69,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -121,21 +120,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -192,47 +190,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -263,7 +220,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator @@ -296,7 +253,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -318,21 +274,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -387,47 +342,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -458,7 +372,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin8.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin8.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketmapjoin8.q.out (working copy) @@ -64,13 +64,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -116,21 +115,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -185,47 +183,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -256,7 +213,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator @@ -308,13 +265,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -360,21 +316,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -429,47 +384,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -500,7 +414,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin12.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin12.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketmapjoin12.q.out (working copy) @@ -92,13 +92,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -144,21 +143,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -213,47 +211,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -284,7 +241,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator @@ -328,13 +285,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_3) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -373,21 +329,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -442,47 +397,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -513,7 +427,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucket_map_join_2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucket_map_join_2.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucket_map_join_2.q.out (working copy) @@ -50,13 +50,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -95,21 +94,20 @@ 0 [Column[key], Column[value]] 1 [Column[key], Column[value]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -162,47 +160,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.table1 name: default.table1 - Truncated Path -> Alias: - /table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -233,7 +190,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /table1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out (working copy) @@ -67,7 +67,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -93,35 +92,24 @@ 1 [Column[key], Column[value]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 - type: bigint + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/join38.q.out =================================================================== --- ql/src/test/results/clientpositive/join38.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/join38.q.out (working copy) @@ -73,13 +73,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME tmp) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) col11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) col5)) (TOK_SELEXPR (TOK_FUNCTION count 1) count)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) col11) 111)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) col5)))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -125,62 +124,51 @@ 1 [Column[col11]] outputColumnNames: _col1, _col9, _col15 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - expr: _col9 - type: string - expr: _col15 - type: string - outputColumnNames: _col1, _col9, _col15 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col9 - type: string - outputColumnNames: _col1, _col9 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col1 - type: string - expr: _col9 - type: string - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: - expr: _col0 - type: string + Select Operator + expressions: expr: _col1 type: string - sort order: ++ - Map-reduce partition columns: - expr: _col0 + expr: _col9 type: string - expr: _col1 + expr: _col15 type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + outputColumnNames: _col1, _col9, _col15 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col9 + type: string + outputColumnNames: _col1, _col9 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col1 + type: string + expr: _col9 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out (working copy) @@ -56,13 +56,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1)) (TOK_TABREF (TOK_TABNAME src1) src2) (AND (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)) (< (. (TOK_TABLE_OR_COL src1) key) 10)) (> (. (TOK_TABLE_OR_COL src2) key) 10))) (TOK_TABREF (TOK_TABNAME src) src3) (AND (= (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src3) key)) (< (. (TOK_TABLE_OR_COL src3) key) 300)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src1 src2))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) key))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -146,72 +145,61 @@ 2 [Column[key]] outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 Position of Big Table: 2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: string + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col2 + type: string + expr: _col4 + type: string + sort order: +++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string - expr: _col4 - type: string - sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator Index: ql/src/test/results/clientpositive/join33.q.out =================================================================== --- ql/src/test/results/clientpositive/join33.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/join33.q.out (working copy) @@ -1,436 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) - -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-3 depends on stages: Stage-6 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: - y - TableScan - alias: y - GatherStats: false - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numPartitions 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numPartitions 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [y] - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: 0 - value expressions: - expr: _col5 - type: string - expr: _col0 - type: string - z - TableScan - alias: z - GatherStats: false - Reduce Output Operator - key expressions: - expr: value - type: string - sort order: + - Map-reduce partition columns: - expr: value - type: string - tag: 1 - value expressions: - expr: value - type: string - Needs Tagging: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numPartitions 4 - numRows 0 - partition_columns ds/hr - rawDataSize 0 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 4 - numPartitions 4 - numRows 0 - partition_columns ds/hr - rawDataSize 0 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 23248 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col1} {VALUE._col4} - 1 {VALUE._col1} - handleSkewJoin: false - outputColumnNames: _col1, _col4, _col9 - Select Operator - expressions: - expr: _col4 - type: string - expr: _col9 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 -#### A masked pattern was here #### - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - -PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from dest_j1 x order by x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: query: select * from dest_j1 x order by x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 -150 val_150 val_150 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -238 val_238 val_238 -238 val_238 val_238 -238 val_238 val_238 -238 val_238 val_238 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -278 val_278 val_278 -278 val_278 val_278 -278 val_278 val_278 -278 val_278 val_278 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -66 val_66 val_66 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 Index: ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (working copy) @@ -1,324 +0,0 @@ -PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) -PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key))))) - -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-1 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-5 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src - TableScan - alias: src - HashTable Sink Operator - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[value]] - 1 [Column[value]] - Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - srcpart - TableScan - alias: srcpart - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[value]] - 1 [Column[value]] - outputColumnNames: _col0 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src1 - TableScan - alias: src1 - HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds -PREHOOK: type: QUERY -POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) - -STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-1 depends on stages: Stage-7 - Stage-6 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src - TableScan - alias: src - HashTable Sink Operator - condition expressions: - 0 {key} {ds} - 1 - handleSkewJoin: false - keys: - 0 [Column[value]] - 1 [Column[value]] - Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - srcpart - TableScan - alias: srcpart - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {ds} - 1 - handleSkewJoin: false - keys: - 0 [Column[value]] - 1 [Column[value]] - outputColumnNames: _col0, _col2 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src1 - TableScan - alias: src1 - HashTable Sink Operator - condition expressions: - 0 {_col2} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col2 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col2} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col2 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col2 - type: string - outputColumnNames: _col2 - Select Operator - expressions: - expr: _col2 - type: string - outputColumnNames: _col2 - Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col2 - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col1 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -5308 -5308 Index: ql/src/test/results/clientpositive/bucketcontext_7.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_7.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketcontext_7.q.out (working copy) @@ -94,13 +94,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -146,21 +145,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -267,48 +265,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -339,7 +295,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -374,7 +331,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -396,21 +352,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -515,48 +470,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -587,7 +500,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketcontext_2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_2.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketcontext_2.q.out (working copy) @@ -69,13 +69,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -121,21 +120,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -242,48 +240,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -314,7 +270,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -347,7 +304,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -369,21 +325,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -488,48 +443,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -560,7 +473,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin11.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin11.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketmapjoin11.q.out (working copy) @@ -124,13 +124,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -176,21 +175,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -293,48 +291,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -365,7 +321,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -409,13 +366,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) (. (TOK_TABLE_OR_COL b) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -461,21 +417,20 @@ 0 [Column[key], Column[part]] 1 [Column[key], Column[part]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -578,48 +533,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -650,7 +563,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/smb_mapjoin_16.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_16.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_16.q.out (working copy) @@ -0,0 +1,120 @@ +PREHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1028 Index: ql/src/test/results/clientpositive/mapjoin_subquery.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_subquery.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/mapjoin_subquery.q.out (working copy) @@ -1,539 +0,0 @@ -PREHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))))) - -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - subq:x - Fetch Operator - limit: -1 - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq:x - TableScan - alias: x - HashTable Sink Operator - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 - z - TableScan - alias: z - HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq:y - TableScan - alias: y - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -238 val_238 -238 val_238 -311 val_311 -311 val_311 -311 val_311 -255 val_255 -255 val_255 -278 val_278 -278 val_278 -98 val_98 -98 val_98 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -150 val_150 -273 val_273 -273 val_273 -273 val_273 -224 val_224 -224 val_224 -369 val_369 -369 val_369 -369 val_369 -66 val_66 -128 val_128 -128 val_128 -128 val_128 -213 val_213 -213 val_213 -146 val_146 -146 val_146 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -128 val_128 -128 val_128 -128 val_128 -311 val_311 -311 val_311 -311 val_311 -213 val_213 -213 val_213 -278 val_278 -278 val_278 -311 val_311 -311 val_311 -311 val_311 -98 val_98 -98 val_98 -369 val_369 -369 val_369 -369 val_369 -238 val_238 -238 val_238 -273 val_273 -273 val_273 -273 val_273 -224 val_224 -224 val_224 -369 val_369 -369 val_369 -369 val_369 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -128 val_128 -128 val_128 -128 val_128 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -255 val_255 -255 val_255 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -146 val_146 -146 val_146 -273 val_273 -273 val_273 -273 val_273 -PREHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq) key1))))) - -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - subq:x - Fetch Operator - limit: -1 - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq:x - TableScan - alias: x - HashTable Sink Operator - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 - z - TableScan - alias: z - HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq:y - TableScan - alias: y - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - Reduce Operator Tree: - Extract - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -128 val_128 -146 val_146 -146 val_146 -146 val_146 -146 val_146 -150 val_150 -213 val_213 -213 val_213 -213 val_213 -213 val_213 -224 val_224 -224 val_224 -224 val_224 -224 val_224 -238 val_238 -238 val_238 -238 val_238 -238 val_238 -255 val_255 -255 val_255 -255 val_255 -255 val_255 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -273 val_273 -278 val_278 -278 val_278 -278 val_278 -278 val_278 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -311 val_311 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -369 val_369 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -401 val_401 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -406 val_406 -66 val_66 -98 val_98 -98 val_98 -98 val_98 -98 val_98 Index: ql/src/test/results/clientpositive/bucket_map_join_1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucket_map_join_1.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucket_map_join_1.q.out (working copy) @@ -50,13 +50,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -95,21 +94,20 @@ 0 [Column[key], Column[value]] 1 [Column[key], Column[value]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -162,47 +160,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.table1 name: default.table1 - Truncated Path -> Alias: - /table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -233,7 +190,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /table1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out (working copy) @@ -134,13 +134,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -179,21 +178,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -296,48 +294,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -368,7 +324,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out (working copy) @@ -67,7 +67,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -93,35 +92,24 @@ 1 [Column[key], Column[value]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 - type: bigint + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/union22.q.out =================================================================== --- ql/src/test/results/clientpositive/union22.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/union22.q.out (working copy) @@ -1,1600 +0,0 @@ -PREHOOK: query: create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string) -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dst_union22 -PREHOOK: query: create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string) -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dst_union22_delta -PREHOOK: query: insert overwrite table dst_union22 partition (ds='1') -select key, value, key , value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dst_union22@ds=1 -POSTHOOK: query: insert overwrite table dst_union22 partition (ds='1') -select key, value, key , value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dst_union22@ds=1 -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table dst_union22_delta partition (ds='1') -select key, key, value, key, value, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dst_union22_delta@ds=1 -POSTHOOK: query: insert overwrite table dst_union22_delta partition (ds='1') -select key, key, value, key, value, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dst_union22_delta@ds=1 -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain extended -insert overwrite table dst_union22 partition (ds='2') -select * from -( -select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 -union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 -from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on -a.k1 = b.k1 and a.ds='1' -where a.k1 > 20 -) -subq -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -insert overwrite table dst_union22 partition (ds='2') -select * from -( -select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 -union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 -from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on -a.k1 = b.k1 and a.ds='1' -where a.k1 > 20 -) -subq -POSTHOOK: type: QUERY -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k1) k1) (TOK_SELEXPR (TOK_TABLE_OR_COL k2) k2) (TOK_SELEXPR (TOK_TABLE_OR_COL k3) k3) (TOK_SELEXPR (TOK_TABLE_OR_COL k4) k4)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (<= (TOK_TABLE_OR_COL k0) 50))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME dst_union22) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (> (TOK_TABLE_OR_COL k0) 50))))) b) (and (= (. (TOK_TABLE_OR_COL a) k1) (. (TOK_TABLE_OR_COL b) k1)) (= (. (TOK_TABLE_OR_COL a) ds) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k1) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k2) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3) k3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k4) k4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) k1) 20))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dst_union22) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) - -STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-1 depends on stages: Stage-7 - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - null-subquery2:subq-subquery2:b:dst_union22_delta - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - null-subquery2:subq-subquery2:b:dst_union22_delta - TableScan - alias: dst_union22_delta - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: ((k0 > 50.0) and (k1 > 20.0)) - type: boolean - Select Operator - expressions: - expr: k1 - type: string - expr: k3 - type: string - expr: k4 - type: string - outputColumnNames: _col1, _col3, _col4 - HashTable Sink Operator - condition expressions: - 0 {k1} {k2} - 1 {_col3} {_col4} - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(ds = '1')} - 1 - handleSkewJoin: false - keys: - 0 [Column[k1]] - 1 [Column[_col1]] - Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - null-subquery2:subq-subquery2:a - TableScan - alias: a - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (k1 > 20.0) - type: boolean - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {k1} {k2} - 1 {_col3} {_col4} - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(ds = '1')} - 1 - handleSkewJoin: false - keys: - 0 [Column[k1]] - 1 [Column[_col1]] - outputColumnNames: _col0, _col1, _col10, _col11 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col10,_col11 - columns.types string,string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 1 - properties: - bucket_count -1 - columns k1,k2,k3,k4 - columns.types string:string:string:string -#### A masked pattern was here #### - name default.dst_union22 - numFiles 1 - numPartitions 1 - numRows 500 - partition_columns ds - rawDataSize 11124 - serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k1,k2,k3,k4 - columns.types string:string:string:string -#### A masked pattern was here #### - name default.dst_union22 - numFiles 1 - numPartitions 1 - numRows 500 - partition_columns ds - rawDataSize 11124 - serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22 - name: default.dst_union22 - Truncated Path -> Alias: - /dst_union22/ds=1 [null-subquery2:subq-subquery2:a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col10 - type: string - expr: _col11 - type: string - outputColumnNames: _col0, _col1, _col10, _col11 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col10 - type: string - expr: _col11 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col10,_col11 - columns.types string,string,string,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col10,_col11 - columns.types string,string,string,string - escape.delim \ - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - TableScan - GatherStats: false - Union - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2/ -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k1,k2,k3,k4 - columns.types string:string:string:string -#### A masked pattern was here #### - name default.dst_union22 - numFiles 1 - numPartitions 1 - numRows 500 - partition_columns ds - rawDataSize 11124 - serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - null-subquery1:subq-subquery1:dst_union22_delta - TableScan - alias: dst_union22_delta - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (k0 <= 50.0) - type: boolean - Select Operator - expressions: - expr: k1 - type: string - expr: k2 - type: string - expr: k3 - type: string - expr: k4 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - Union - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2/ -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k1,k2,k3,k4 - columns.types string:string:string:string -#### A masked pattern was here #### - name default.dst_union22 - numFiles 1 - numPartitions 1 - numRows 500 - partition_columns ds - rawDataSize 11124 - serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,string,string,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string,string,string,string - escape.delim \ -#### A masked pattern was here #### - Partition - base file name: ds=1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 1 - properties: - bucket_count -1 - columns k0,k1,k2,k3,k4,k5 - columns.types string:string:string:string:string:string -#### A masked pattern was here #### - name default.dst_union22_delta - numFiles 1 - numPartitions 1 - numRows 500 - partition_columns ds - rawDataSize 16936 - serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 17436 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k0,k1,k2,k3,k4,k5 - columns.types string:string:string:string:string:string -#### A masked pattern was here #### - name default.dst_union22_delta - numFiles 1 - numPartitions 1 - numRows 500 - partition_columns ds - rawDataSize 16936 - serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 17436 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22_delta - name: default.dst_union22_delta - Truncated Path -> Alias: - /dst_union22_delta/ds=1 [null-subquery1:subq-subquery1:dst_union22_delta] -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k1,k2,k3,k4 - columns.types string:string:string:string -#### A masked pattern was here #### - name default.dst_union22 - numFiles 1 - numPartitions 1 - numRows 500 - partition_columns ds - rawDataSize 11124 - serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11624 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22 -#### A masked pattern was here #### - - Stage: Stage-4 - Stats-Aggr Operator -#### A masked pattern was here #### - - -PREHOOK: query: insert overwrite table dst_union22 partition (ds='2') -select * from -( -select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 -union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 -from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on -a.k1 = b.k1 and a.ds='1' -where a.k1 > 20 -) -subq -PREHOOK: type: QUERY -PREHOOK: Input: default@dst_union22 -PREHOOK: Input: default@dst_union22@ds=1 -PREHOOK: Input: default@dst_union22_delta -PREHOOK: Input: default@dst_union22_delta@ds=1 -PREHOOK: Output: default@dst_union22@ds=2 -POSTHOOK: query: insert overwrite table dst_union22 partition (ds='2') -select * from -( -select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 -union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 -from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on -a.k1 = b.k1 and a.ds='1' -where a.k1 > 20 -) -subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dst_union22 -POSTHOOK: Input: default@dst_union22@ds=1 -POSTHOOK: Input: default@dst_union22_delta -POSTHOOK: Input: default@dst_union22_delta@ds=1 -POSTHOOK: Output: default@dst_union22@ds=2 -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=2).k1 EXPRESSION [(dst_union22_delta)dst_union22_delta.FieldSchema(name:k1, type:string, comment:null), (dst_union22)a.FieldSchema(name:k1, type:string, comment:null), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=2).k2 EXPRESSION [(dst_union22_delta)dst_union22_delta.FieldSchema(name:k2, type:string, comment:null), (dst_union22)a.FieldSchema(name:k2, type:string, comment:null), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=2).k3 EXPRESSION [(dst_union22_delta)dst_union22_delta.FieldSchema(name:k3, type:string, comment:null), (dst_union22_delta)dst_union22_delta.FieldSchema(name:k3, type:string, comment:null), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=2).k4 EXPRESSION [(dst_union22_delta)dst_union22_delta.FieldSchema(name:k4, type:string, comment:null), (dst_union22_delta)dst_union22_delta.FieldSchema(name:k4, type:string, comment:null), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from dst_union22 where ds = '2' order by k1, k2, k3, k4 -PREHOOK: type: QUERY -PREHOOK: Input: default@dst_union22 -PREHOOK: Input: default@dst_union22@ds=2 -#### A masked pattern was here #### -POSTHOOK: query: select * from dst_union22 where ds = '2' order by k1, k2, k3, k4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dst_union22 -POSTHOOK: Input: default@dst_union22@ds=2 -#### A masked pattern was here #### -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=2).k1 EXPRESSION [(dst_union22_delta)dst_union22_delta.FieldSchema(name:k1, type:string, comment:null), (dst_union22)a.FieldSchema(name:k1, type:string, comment:null), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=2).k2 EXPRESSION [(dst_union22_delta)dst_union22_delta.FieldSchema(name:k2, type:string, comment:null), (dst_union22)a.FieldSchema(name:k2, type:string, comment:null), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=2).k3 EXPRESSION [(dst_union22_delta)dst_union22_delta.FieldSchema(name:k3, type:string, comment:null), (dst_union22_delta)dst_union22_delta.FieldSchema(name:k3, type:string, comment:null), ] -POSTHOOK: Lineage: dst_union22 PARTITION(ds=2).k4 EXPRESSION [(dst_union22_delta)dst_union22_delta.FieldSchema(name:k4, type:string, comment:null), (dst_union22_delta)dst_union22_delta.FieldSchema(name:k4, type:string, comment:null), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -0 val_0 0 val_0 2 -0 val_0 0 val_0 2 -0 val_0 0 val_0 2 -10 val_10 10 val_10 2 -100 val_100 100 val_100 2 -100 val_100 100 val_100 2 -100 val_100 100 val_100 2 -100 val_100 100 val_100 2 -103 val_103 103 val_103 2 -103 val_103 103 val_103 2 -103 val_103 103 val_103 2 -103 val_103 103 val_103 2 -104 val_104 104 val_104 2 -104 val_104 104 val_104 2 -104 val_104 104 val_104 2 -104 val_104 104 val_104 2 -105 val_105 105 val_105 2 -11 val_11 11 val_11 2 -111 val_111 111 val_111 2 -113 val_113 113 val_113 2 -113 val_113 113 val_113 2 -113 val_113 113 val_113 2 -113 val_113 113 val_113 2 -114 val_114 114 val_114 2 -116 val_116 116 val_116 2 -118 val_118 118 val_118 2 -118 val_118 118 val_118 2 -118 val_118 118 val_118 2 -118 val_118 118 val_118 2 -119 val_119 119 val_119 2 -119 val_119 119 val_119 2 -119 val_119 119 val_119 2 -119 val_119 119 val_119 2 -119 val_119 119 val_119 2 -119 val_119 119 val_119 2 -119 val_119 119 val_119 2 -119 val_119 119 val_119 2 -119 val_119 119 val_119 2 -12 val_12 12 val_12 2 -12 val_12 12 val_12 2 -120 val_120 120 val_120 2 -120 val_120 120 val_120 2 -120 val_120 120 val_120 2 -120 val_120 120 val_120 2 -125 val_125 125 val_125 2 -125 val_125 125 val_125 2 -125 val_125 125 val_125 2 -125 val_125 125 val_125 2 -126 val_126 126 val_126 2 -128 val_128 128 val_128 2 -128 val_128 128 val_128 2 -128 val_128 128 val_128 2 -128 val_128 128 val_128 2 -128 val_128 128 val_128 2 -128 val_128 128 val_128 2 -128 val_128 128 val_128 2 -128 val_128 128 val_128 2 -128 val_128 128 val_128 2 -129 val_129 129 val_129 2 -129 val_129 129 val_129 2 -129 val_129 129 val_129 2 -129 val_129 129 val_129 2 -131 val_131 131 val_131 2 -133 val_133 133 val_133 2 -134 val_134 134 val_134 2 -134 val_134 134 val_134 2 -134 val_134 134 val_134 2 -134 val_134 134 val_134 2 -136 val_136 136 val_136 2 -137 val_137 137 val_137 2 -137 val_137 137 val_137 2 -137 val_137 137 val_137 2 -137 val_137 137 val_137 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -138 val_138 138 val_138 2 -143 val_143 143 val_143 2 -145 val_145 145 val_145 2 -146 val_146 146 val_146 2 -146 val_146 146 val_146 2 -146 val_146 146 val_146 2 -146 val_146 146 val_146 2 -149 val_149 149 val_149 2 -149 val_149 149 val_149 2 -149 val_149 149 val_149 2 -149 val_149 149 val_149 2 -15 val_15 15 val_15 2 -15 val_15 15 val_15 2 -150 val_150 150 val_150 2 -152 val_152 152 val_152 2 -152 val_152 152 val_152 2 -152 val_152 152 val_152 2 -152 val_152 152 val_152 2 -153 val_153 153 val_153 2 -155 val_155 155 val_155 2 -156 val_156 156 val_156 2 -157 val_157 157 val_157 2 -158 val_158 158 val_158 2 -160 val_160 160 val_160 2 -162 val_162 162 val_162 2 -163 val_163 163 val_163 2 -164 val_164 164 val_164 2 -164 val_164 164 val_164 2 -164 val_164 164 val_164 2 -164 val_164 164 val_164 2 -165 val_165 165 val_165 2 -165 val_165 165 val_165 2 -165 val_165 165 val_165 2 -165 val_165 165 val_165 2 -166 val_166 166 val_166 2 -167 val_167 167 val_167 2 -167 val_167 167 val_167 2 -167 val_167 167 val_167 2 -167 val_167 167 val_167 2 -167 val_167 167 val_167 2 -167 val_167 167 val_167 2 -167 val_167 167 val_167 2 -167 val_167 167 val_167 2 -167 val_167 167 val_167 2 -168 val_168 168 val_168 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -169 val_169 169 val_169 2 -17 val_17 17 val_17 2 -170 val_170 170 val_170 2 -172 val_172 172 val_172 2 -172 val_172 172 val_172 2 -172 val_172 172 val_172 2 -172 val_172 172 val_172 2 -174 val_174 174 val_174 2 -174 val_174 174 val_174 2 -174 val_174 174 val_174 2 -174 val_174 174 val_174 2 -175 val_175 175 val_175 2 -175 val_175 175 val_175 2 -175 val_175 175 val_175 2 -175 val_175 175 val_175 2 -176 val_176 176 val_176 2 -176 val_176 176 val_176 2 -176 val_176 176 val_176 2 -176 val_176 176 val_176 2 -177 val_177 177 val_177 2 -178 val_178 178 val_178 2 -179 val_179 179 val_179 2 -179 val_179 179 val_179 2 -179 val_179 179 val_179 2 -179 val_179 179 val_179 2 -18 val_18 18 val_18 2 -18 val_18 18 val_18 2 -180 val_180 180 val_180 2 -181 val_181 181 val_181 2 -183 val_183 183 val_183 2 -186 val_186 186 val_186 2 -187 val_187 187 val_187 2 -187 val_187 187 val_187 2 -187 val_187 187 val_187 2 -187 val_187 187 val_187 2 -187 val_187 187 val_187 2 -187 val_187 187 val_187 2 -187 val_187 187 val_187 2 -187 val_187 187 val_187 2 -187 val_187 187 val_187 2 -189 val_189 189 val_189 2 -19 val_19 19 val_19 2 -190 val_190 190 val_190 2 -191 val_191 191 val_191 2 -191 val_191 191 val_191 2 -191 val_191 191 val_191 2 -191 val_191 191 val_191 2 -192 val_192 192 val_192 2 -193 val_193 193 val_193 2 -193 val_193 193 val_193 2 -193 val_193 193 val_193 2 -193 val_193 193 val_193 2 -193 val_193 193 val_193 2 -193 val_193 193 val_193 2 -193 val_193 193 val_193 2 -193 val_193 193 val_193 2 -193 val_193 193 val_193 2 -194 val_194 194 val_194 2 -195 val_195 195 val_195 2 -195 val_195 195 val_195 2 -195 val_195 195 val_195 2 -195 val_195 195 val_195 2 -196 val_196 196 val_196 2 -197 val_197 197 val_197 2 -197 val_197 197 val_197 2 -197 val_197 197 val_197 2 -197 val_197 197 val_197 2 -199 val_199 199 val_199 2 -199 val_199 199 val_199 2 -199 val_199 199 val_199 2 -199 val_199 199 val_199 2 -199 val_199 199 val_199 2 -199 val_199 199 val_199 2 -199 val_199 199 val_199 2 -199 val_199 199 val_199 2 -199 val_199 199 val_199 2 -2 val_2 2 val_2 2 -20 val_20 20 val_20 2 -200 val_200 200 val_200 2 -200 val_200 200 val_200 2 -200 val_200 200 val_200 2 -200 val_200 200 val_200 2 -201 val_201 201 val_201 2 -202 val_202 202 val_202 2 -203 val_203 203 val_203 2 -203 val_203 203 val_203 2 -203 val_203 203 val_203 2 -203 val_203 203 val_203 2 -205 val_205 205 val_205 2 -205 val_205 205 val_205 2 -205 val_205 205 val_205 2 -205 val_205 205 val_205 2 -207 val_207 207 val_207 2 -207 val_207 207 val_207 2 -207 val_207 207 val_207 2 -207 val_207 207 val_207 2 -208 val_208 208 val_208 2 -208 val_208 208 val_208 2 -208 val_208 208 val_208 2 -208 val_208 208 val_208 2 -208 val_208 208 val_208 2 -208 val_208 208 val_208 2 -208 val_208 208 val_208 2 -208 val_208 208 val_208 2 -208 val_208 208 val_208 2 -209 val_209 209 val_209 2 -209 val_209 209 val_209 2 -209 val_209 209 val_209 2 -209 val_209 209 val_209 2 -213 val_213 213 val_213 2 -213 val_213 213 val_213 2 -213 val_213 213 val_213 2 -213 val_213 213 val_213 2 -214 val_214 214 val_214 2 -216 val_216 216 val_216 2 -216 val_216 216 val_216 2 -216 val_216 216 val_216 2 -216 val_216 216 val_216 2 -217 val_217 217 val_217 2 -217 val_217 217 val_217 2 -217 val_217 217 val_217 2 -217 val_217 217 val_217 2 -218 val_218 218 val_218 2 -219 val_219 219 val_219 2 -219 val_219 219 val_219 2 -219 val_219 219 val_219 2 -219 val_219 219 val_219 2 -221 val_221 221 val_221 2 -221 val_221 221 val_221 2 -221 val_221 221 val_221 2 -221 val_221 221 val_221 2 -222 val_222 222 val_222 2 -223 val_223 223 val_223 2 -223 val_223 223 val_223 2 -223 val_223 223 val_223 2 -223 val_223 223 val_223 2 -224 val_224 224 val_224 2 -224 val_224 224 val_224 2 -224 val_224 224 val_224 2 -224 val_224 224 val_224 2 -226 val_226 226 val_226 2 -228 val_228 228 val_228 2 -229 val_229 229 val_229 2 -229 val_229 229 val_229 2 -229 val_229 229 val_229 2 -229 val_229 229 val_229 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -230 val_230 230 val_230 2 -233 val_233 233 val_233 2 -233 val_233 233 val_233 2 -233 val_233 233 val_233 2 -233 val_233 233 val_233 2 -235 val_235 235 val_235 2 -237 val_237 237 val_237 2 -237 val_237 237 val_237 2 -237 val_237 237 val_237 2 -237 val_237 237 val_237 2 -238 val_238 238 val_238 2 -238 val_238 238 val_238 2 -238 val_238 238 val_238 2 -238 val_238 238 val_238 2 -239 val_239 239 val_239 2 -239 val_239 239 val_239 2 -239 val_239 239 val_239 2 -239 val_239 239 val_239 2 -24 val_24 NULL NULL 2 -24 val_24 NULL NULL 2 -24 val_24 24 val_24 2 -24 val_24 24 val_24 2 -241 val_241 241 val_241 2 -242 val_242 242 val_242 2 -242 val_242 242 val_242 2 -242 val_242 242 val_242 2 -242 val_242 242 val_242 2 -244 val_244 244 val_244 2 -247 val_247 247 val_247 2 -248 val_248 248 val_248 2 -249 val_249 249 val_249 2 -252 val_252 252 val_252 2 -255 val_255 255 val_255 2 -255 val_255 255 val_255 2 -255 val_255 255 val_255 2 -255 val_255 255 val_255 2 -256 val_256 256 val_256 2 -256 val_256 256 val_256 2 -256 val_256 256 val_256 2 -256 val_256 256 val_256 2 -257 val_257 257 val_257 2 -258 val_258 258 val_258 2 -26 val_26 NULL NULL 2 -26 val_26 NULL NULL 2 -26 val_26 26 val_26 2 -26 val_26 26 val_26 2 -260 val_260 260 val_260 2 -262 val_262 262 val_262 2 -263 val_263 263 val_263 2 -265 val_265 265 val_265 2 -265 val_265 265 val_265 2 -265 val_265 265 val_265 2 -265 val_265 265 val_265 2 -266 val_266 266 val_266 2 -27 val_27 NULL NULL 2 -27 val_27 27 val_27 2 -272 val_272 272 val_272 2 -272 val_272 272 val_272 2 -272 val_272 272 val_272 2 -272 val_272 272 val_272 2 -273 val_273 273 val_273 2 -273 val_273 273 val_273 2 -273 val_273 273 val_273 2 -273 val_273 273 val_273 2 -273 val_273 273 val_273 2 -273 val_273 273 val_273 2 -273 val_273 273 val_273 2 -273 val_273 273 val_273 2 -273 val_273 273 val_273 2 -274 val_274 274 val_274 2 -275 val_275 275 val_275 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -277 val_277 277 val_277 2 -278 val_278 278 val_278 2 -278 val_278 278 val_278 2 -278 val_278 278 val_278 2 -278 val_278 278 val_278 2 -28 val_28 NULL NULL 2 -28 val_28 28 val_28 2 -280 val_280 280 val_280 2 -280 val_280 280 val_280 2 -280 val_280 280 val_280 2 -280 val_280 280 val_280 2 -281 val_281 281 val_281 2 -281 val_281 281 val_281 2 -281 val_281 281 val_281 2 -281 val_281 281 val_281 2 -282 val_282 282 val_282 2 -282 val_282 282 val_282 2 -282 val_282 282 val_282 2 -282 val_282 282 val_282 2 -283 val_283 283 val_283 2 -284 val_284 284 val_284 2 -285 val_285 285 val_285 2 -286 val_286 286 val_286 2 -287 val_287 287 val_287 2 -288 val_288 288 val_288 2 -288 val_288 288 val_288 2 -288 val_288 288 val_288 2 -288 val_288 288 val_288 2 -289 val_289 289 val_289 2 -291 val_291 291 val_291 2 -292 val_292 292 val_292 2 -296 val_296 296 val_296 2 -298 val_298 298 val_298 2 -298 val_298 298 val_298 2 -298 val_298 298 val_298 2 -298 val_298 298 val_298 2 -298 val_298 298 val_298 2 -298 val_298 298 val_298 2 -298 val_298 298 val_298 2 -298 val_298 298 val_298 2 -298 val_298 298 val_298 2 -30 val_30 NULL NULL 2 -30 val_30 30 val_30 2 -302 val_302 302 val_302 2 -305 val_305 305 val_305 2 -306 val_306 306 val_306 2 -307 val_307 307 val_307 2 -307 val_307 307 val_307 2 -307 val_307 307 val_307 2 -307 val_307 307 val_307 2 -308 val_308 308 val_308 2 -309 val_309 309 val_309 2 -309 val_309 309 val_309 2 -309 val_309 309 val_309 2 -309 val_309 309 val_309 2 -310 val_310 310 val_310 2 -311 val_311 311 val_311 2 -311 val_311 311 val_311 2 -311 val_311 311 val_311 2 -311 val_311 311 val_311 2 -311 val_311 311 val_311 2 -311 val_311 311 val_311 2 -311 val_311 311 val_311 2 -311 val_311 311 val_311 2 -311 val_311 311 val_311 2 -315 val_315 315 val_315 2 -316 val_316 316 val_316 2 -316 val_316 316 val_316 2 -316 val_316 316 val_316 2 -316 val_316 316 val_316 2 -316 val_316 316 val_316 2 -316 val_316 316 val_316 2 -316 val_316 316 val_316 2 -316 val_316 316 val_316 2 -316 val_316 316 val_316 2 -317 val_317 317 val_317 2 -317 val_317 317 val_317 2 -317 val_317 317 val_317 2 -317 val_317 317 val_317 2 -318 val_318 318 val_318 2 -318 val_318 318 val_318 2 -318 val_318 318 val_318 2 -318 val_318 318 val_318 2 -318 val_318 318 val_318 2 -318 val_318 318 val_318 2 -318 val_318 318 val_318 2 -318 val_318 318 val_318 2 -318 val_318 318 val_318 2 -321 val_321 321 val_321 2 -321 val_321 321 val_321 2 -321 val_321 321 val_321 2 -321 val_321 321 val_321 2 -322 val_322 322 val_322 2 -322 val_322 322 val_322 2 -322 val_322 322 val_322 2 -322 val_322 322 val_322 2 -323 val_323 323 val_323 2 -325 val_325 325 val_325 2 -325 val_325 325 val_325 2 -325 val_325 325 val_325 2 -325 val_325 325 val_325 2 -327 val_327 327 val_327 2 -327 val_327 327 val_327 2 -327 val_327 327 val_327 2 -327 val_327 327 val_327 2 -327 val_327 327 val_327 2 -327 val_327 327 val_327 2 -327 val_327 327 val_327 2 -327 val_327 327 val_327 2 -327 val_327 327 val_327 2 -33 val_33 NULL NULL 2 -33 val_33 33 val_33 2 -331 val_331 331 val_331 2 -331 val_331 331 val_331 2 -331 val_331 331 val_331 2 -331 val_331 331 val_331 2 -332 val_332 332 val_332 2 -333 val_333 333 val_333 2 -333 val_333 333 val_333 2 -333 val_333 333 val_333 2 -333 val_333 333 val_333 2 -335 val_335 335 val_335 2 -336 val_336 336 val_336 2 -338 val_338 338 val_338 2 -339 val_339 339 val_339 2 -34 val_34 NULL NULL 2 -34 val_34 34 val_34 2 -341 val_341 341 val_341 2 -342 val_342 342 val_342 2 -342 val_342 342 val_342 2 -342 val_342 342 val_342 2 -342 val_342 342 val_342 2 -344 val_344 344 val_344 2 -344 val_344 344 val_344 2 -344 val_344 344 val_344 2 -344 val_344 344 val_344 2 -345 val_345 345 val_345 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -348 val_348 348 val_348 2 -35 val_35 NULL NULL 2 -35 val_35 NULL NULL 2 -35 val_35 NULL NULL 2 -35 val_35 35 val_35 2 -35 val_35 35 val_35 2 -35 val_35 35 val_35 2 -351 val_351 351 val_351 2 -353 val_353 353 val_353 2 -353 val_353 353 val_353 2 -353 val_353 353 val_353 2 -353 val_353 353 val_353 2 -356 val_356 356 val_356 2 -360 val_360 360 val_360 2 -362 val_362 362 val_362 2 -364 val_364 364 val_364 2 -365 val_365 365 val_365 2 -366 val_366 366 val_366 2 -367 val_367 367 val_367 2 -367 val_367 367 val_367 2 -367 val_367 367 val_367 2 -367 val_367 367 val_367 2 -368 val_368 368 val_368 2 -369 val_369 369 val_369 2 -369 val_369 369 val_369 2 -369 val_369 369 val_369 2 -369 val_369 369 val_369 2 -369 val_369 369 val_369 2 -369 val_369 369 val_369 2 -369 val_369 369 val_369 2 -369 val_369 369 val_369 2 -369 val_369 369 val_369 2 -37 val_37 NULL NULL 2 -37 val_37 NULL NULL 2 -37 val_37 37 val_37 2 -37 val_37 37 val_37 2 -373 val_373 373 val_373 2 -374 val_374 374 val_374 2 -375 val_375 375 val_375 2 -377 val_377 377 val_377 2 -378 val_378 378 val_378 2 -379 val_379 379 val_379 2 -382 val_382 382 val_382 2 -382 val_382 382 val_382 2 -382 val_382 382 val_382 2 -382 val_382 382 val_382 2 -384 val_384 384 val_384 2 -384 val_384 384 val_384 2 -384 val_384 384 val_384 2 -384 val_384 384 val_384 2 -384 val_384 384 val_384 2 -384 val_384 384 val_384 2 -384 val_384 384 val_384 2 -384 val_384 384 val_384 2 -384 val_384 384 val_384 2 -386 val_386 386 val_386 2 -389 val_389 389 val_389 2 -392 val_392 392 val_392 2 -393 val_393 393 val_393 2 -394 val_394 394 val_394 2 -395 val_395 395 val_395 2 -395 val_395 395 val_395 2 -395 val_395 395 val_395 2 -395 val_395 395 val_395 2 -396 val_396 396 val_396 2 -396 val_396 396 val_396 2 -396 val_396 396 val_396 2 -396 val_396 396 val_396 2 -396 val_396 396 val_396 2 -396 val_396 396 val_396 2 -396 val_396 396 val_396 2 -396 val_396 396 val_396 2 -396 val_396 396 val_396 2 -397 val_397 397 val_397 2 -397 val_397 397 val_397 2 -397 val_397 397 val_397 2 -397 val_397 397 val_397 2 -399 val_399 399 val_399 2 -399 val_399 399 val_399 2 -399 val_399 399 val_399 2 -399 val_399 399 val_399 2 -4 val_4 4 val_4 2 -400 val_400 400 val_400 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -401 val_401 401 val_401 2 -402 val_402 402 val_402 2 -403 val_403 403 val_403 2 -403 val_403 403 val_403 2 -403 val_403 403 val_403 2 -403 val_403 403 val_403 2 -403 val_403 403 val_403 2 -403 val_403 403 val_403 2 -403 val_403 403 val_403 2 -403 val_403 403 val_403 2 -403 val_403 403 val_403 2 -404 val_404 404 val_404 2 -404 val_404 404 val_404 2 -404 val_404 404 val_404 2 -404 val_404 404 val_404 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -406 val_406 406 val_406 2 -407 val_407 407 val_407 2 -409 val_409 409 val_409 2 -409 val_409 409 val_409 2 -409 val_409 409 val_409 2 -409 val_409 409 val_409 2 -409 val_409 409 val_409 2 -409 val_409 409 val_409 2 -409 val_409 409 val_409 2 -409 val_409 409 val_409 2 -409 val_409 409 val_409 2 -41 val_41 NULL NULL 2 -41 val_41 41 val_41 2 -411 val_411 411 val_411 2 -413 val_413 413 val_413 2 -413 val_413 413 val_413 2 -413 val_413 413 val_413 2 -413 val_413 413 val_413 2 -414 val_414 414 val_414 2 -414 val_414 414 val_414 2 -414 val_414 414 val_414 2 -414 val_414 414 val_414 2 -417 val_417 417 val_417 2 -417 val_417 417 val_417 2 -417 val_417 417 val_417 2 -417 val_417 417 val_417 2 -417 val_417 417 val_417 2 -417 val_417 417 val_417 2 -417 val_417 417 val_417 2 -417 val_417 417 val_417 2 -417 val_417 417 val_417 2 -418 val_418 418 val_418 2 -419 val_419 419 val_419 2 -42 val_42 NULL NULL 2 -42 val_42 NULL NULL 2 -42 val_42 42 val_42 2 -42 val_42 42 val_42 2 -421 val_421 421 val_421 2 -424 val_424 424 val_424 2 -424 val_424 424 val_424 2 -424 val_424 424 val_424 2 -424 val_424 424 val_424 2 -427 val_427 427 val_427 2 -429 val_429 429 val_429 2 -429 val_429 429 val_429 2 -429 val_429 429 val_429 2 -429 val_429 429 val_429 2 -43 val_43 NULL NULL 2 -43 val_43 43 val_43 2 -430 val_430 430 val_430 2 -430 val_430 430 val_430 2 -430 val_430 430 val_430 2 -430 val_430 430 val_430 2 -430 val_430 430 val_430 2 -430 val_430 430 val_430 2 -430 val_430 430 val_430 2 -430 val_430 430 val_430 2 -430 val_430 430 val_430 2 -431 val_431 431 val_431 2 -431 val_431 431 val_431 2 -431 val_431 431 val_431 2 -431 val_431 431 val_431 2 -431 val_431 431 val_431 2 -431 val_431 431 val_431 2 -431 val_431 431 val_431 2 -431 val_431 431 val_431 2 -431 val_431 431 val_431 2 -432 val_432 432 val_432 2 -435 val_435 435 val_435 2 -436 val_436 436 val_436 2 -437 val_437 437 val_437 2 -438 val_438 438 val_438 2 -438 val_438 438 val_438 2 -438 val_438 438 val_438 2 -438 val_438 438 val_438 2 -438 val_438 438 val_438 2 -438 val_438 438 val_438 2 -438 val_438 438 val_438 2 -438 val_438 438 val_438 2 -438 val_438 438 val_438 2 -439 val_439 439 val_439 2 -439 val_439 439 val_439 2 -439 val_439 439 val_439 2 -439 val_439 439 val_439 2 -44 val_44 NULL NULL 2 -44 val_44 44 val_44 2 -443 val_443 443 val_443 2 -444 val_444 444 val_444 2 -446 val_446 446 val_446 2 -448 val_448 448 val_448 2 -449 val_449 449 val_449 2 -452 val_452 452 val_452 2 -453 val_453 453 val_453 2 -454 val_454 454 val_454 2 -454 val_454 454 val_454 2 -454 val_454 454 val_454 2 -454 val_454 454 val_454 2 -454 val_454 454 val_454 2 -454 val_454 454 val_454 2 -454 val_454 454 val_454 2 -454 val_454 454 val_454 2 -454 val_454 454 val_454 2 -455 val_455 455 val_455 2 -457 val_457 457 val_457 2 -458 val_458 458 val_458 2 -458 val_458 458 val_458 2 -458 val_458 458 val_458 2 -458 val_458 458 val_458 2 -459 val_459 459 val_459 2 -459 val_459 459 val_459 2 -459 val_459 459 val_459 2 -459 val_459 459 val_459 2 -460 val_460 460 val_460 2 -462 val_462 462 val_462 2 -462 val_462 462 val_462 2 -462 val_462 462 val_462 2 -462 val_462 462 val_462 2 -463 val_463 463 val_463 2 -463 val_463 463 val_463 2 -463 val_463 463 val_463 2 -463 val_463 463 val_463 2 -466 val_466 466 val_466 2 -466 val_466 466 val_466 2 -466 val_466 466 val_466 2 -466 val_466 466 val_466 2 -466 val_466 466 val_466 2 -466 val_466 466 val_466 2 -466 val_466 466 val_466 2 -466 val_466 466 val_466 2 -466 val_466 466 val_466 2 -467 val_467 467 val_467 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -468 val_468 468 val_468 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -469 val_469 469 val_469 2 -47 val_47 NULL NULL 2 -47 val_47 47 val_47 2 -470 val_470 470 val_470 2 -472 val_472 472 val_472 2 -475 val_475 475 val_475 2 -477 val_477 477 val_477 2 -478 val_478 478 val_478 2 -478 val_478 478 val_478 2 -478 val_478 478 val_478 2 -478 val_478 478 val_478 2 -479 val_479 479 val_479 2 -480 val_480 480 val_480 2 -480 val_480 480 val_480 2 -480 val_480 480 val_480 2 -480 val_480 480 val_480 2 -480 val_480 480 val_480 2 -480 val_480 480 val_480 2 -480 val_480 480 val_480 2 -480 val_480 480 val_480 2 -480 val_480 480 val_480 2 -481 val_481 481 val_481 2 -482 val_482 482 val_482 2 -483 val_483 483 val_483 2 -484 val_484 484 val_484 2 -485 val_485 485 val_485 2 -487 val_487 487 val_487 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -489 val_489 489 val_489 2 -490 val_490 490 val_490 2 -491 val_491 491 val_491 2 -492 val_492 492 val_492 2 -492 val_492 492 val_492 2 -492 val_492 492 val_492 2 -492 val_492 492 val_492 2 -493 val_493 493 val_493 2 -494 val_494 494 val_494 2 -495 val_495 495 val_495 2 -496 val_496 496 val_496 2 -497 val_497 497 val_497 2 -498 val_498 498 val_498 2 -498 val_498 498 val_498 2 -498 val_498 498 val_498 2 -498 val_498 498 val_498 2 -498 val_498 498 val_498 2 -498 val_498 498 val_498 2 -498 val_498 498 val_498 2 -498 val_498 498 val_498 2 -498 val_498 498 val_498 2 -5 val_5 5 val_5 2 -5 val_5 5 val_5 2 -5 val_5 5 val_5 2 -51 val_51 51 val_51 2 -51 val_51 51 val_51 2 -51 val_51 51 val_51 2 -51 val_51 51 val_51 2 -53 val_53 53 val_53 2 -54 val_54 54 val_54 2 -57 val_57 57 val_57 2 -58 val_58 58 val_58 2 -58 val_58 58 val_58 2 -58 val_58 58 val_58 2 -58 val_58 58 val_58 2 -64 val_64 64 val_64 2 -65 val_65 65 val_65 2 -66 val_66 66 val_66 2 -67 val_67 67 val_67 2 -67 val_67 67 val_67 2 -67 val_67 67 val_67 2 -67 val_67 67 val_67 2 -69 val_69 69 val_69 2 -70 val_70 70 val_70 2 -70 val_70 70 val_70 2 -70 val_70 70 val_70 2 -70 val_70 70 val_70 2 -70 val_70 70 val_70 2 -70 val_70 70 val_70 2 -70 val_70 70 val_70 2 -70 val_70 70 val_70 2 -70 val_70 70 val_70 2 -72 val_72 72 val_72 2 -72 val_72 72 val_72 2 -72 val_72 72 val_72 2 -72 val_72 72 val_72 2 -74 val_74 74 val_74 2 -76 val_76 76 val_76 2 -76 val_76 76 val_76 2 -76 val_76 76 val_76 2 -76 val_76 76 val_76 2 -77 val_77 77 val_77 2 -78 val_78 78 val_78 2 -8 val_8 8 val_8 2 -80 val_80 80 val_80 2 -82 val_82 82 val_82 2 -83 val_83 83 val_83 2 -83 val_83 83 val_83 2 -83 val_83 83 val_83 2 -83 val_83 83 val_83 2 -84 val_84 84 val_84 2 -84 val_84 84 val_84 2 -84 val_84 84 val_84 2 -84 val_84 84 val_84 2 -85 val_85 85 val_85 2 -86 val_86 86 val_86 2 -87 val_87 87 val_87 2 -9 val_9 9 val_9 2 -90 val_90 90 val_90 2 -90 val_90 90 val_90 2 -90 val_90 90 val_90 2 -90 val_90 90 val_90 2 -90 val_90 90 val_90 2 -90 val_90 90 val_90 2 -90 val_90 90 val_90 2 -90 val_90 90 val_90 2 -90 val_90 90 val_90 2 -92 val_92 92 val_92 2 -95 val_95 95 val_95 2 -95 val_95 95 val_95 2 -95 val_95 95 val_95 2 -95 val_95 95 val_95 2 -96 val_96 96 val_96 2 -97 val_97 97 val_97 2 -97 val_97 97 val_97 2 -97 val_97 97 val_97 2 -97 val_97 97 val_97 2 -98 val_98 98 val_98 2 -98 val_98 98 val_98 2 -98 val_98 98 val_98 2 -98 val_98 98 val_98 2 Index: ql/src/test/results/clientpositive/join32.q.out =================================================================== --- ql/src/test/results/clientpositive/join32.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/join32.q.out (working copy) @@ -1,546 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) - -STAGE DEPENDENCIES: - Stage-12 is a root stage - Stage-8 depends on stages: Stage-12 - Stage-11 depends on stages: Stage-8 - Stage-1 depends on stages: Stage-11 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-12 - Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - Position of Big Table: 1 - - Stage: Stage-8 - Map Reduce - Alias -> Map Operator Tree: - y - TableScan - alias: y - GatherStats: false - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numPartitions 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numPartitions 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [y] - - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - z - TableScan - alias: z - GatherStats: false - HashTable Sink Operator - condition expressions: - 0 {_col5} {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col1]] - 1 [Column[value]] - Position of Big Table: 0 - - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col5} {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col1]] - 1 [Column[value]] - outputColumnNames: _col1, _col4, _col9 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col9 - type: string - outputColumnNames: _col1, _col4, _col9 - Select Operator - expressions: - expr: _col4 - type: string - expr: _col9 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 -#### A masked pattern was here #### - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - -PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from dest_j1 x order by x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: query: select * from dest_j1 x order by x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 -150 val_150 val_150 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -238 val_238 val_238 -238 val_238 val_238 -238 val_238 val_238 -238 val_238 val_238 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -278 val_278 val_278 -278 val_278 val_278 -278 val_278 val_278 -278 val_278 val_278 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -66 val_66 val_66 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 Index: ql/src/test/results/clientpositive/bucketcontext_6.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_6.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketcontext_6.q.out (working copy) @@ -68,13 +68,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -120,21 +119,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -241,48 +239,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -313,7 +269,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -344,7 +301,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -366,21 +322,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -485,48 +440,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -557,7 +470,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketcontext_1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_1.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketcontext_1.q.out (working copy) @@ -81,13 +81,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -133,21 +132,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -254,48 +252,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -326,7 +282,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -359,7 +316,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -381,21 +337,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -500,48 +455,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -572,7 +485,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin10.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin10.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketmapjoin10.q.out (working copy) @@ -118,13 +118,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -163,21 +162,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -280,48 +278,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -352,7 +308,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/mapjoin_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_distinct.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/mapjoin_distinct.q.out (working copy) @@ -14,14 +14,13 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: d @@ -59,45 +58,34 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Group By Operator - bucketGroup: false - keys: + Select Operator + expressions: expr: _col1 type: string - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 type: string - sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 + outputColumnNames: _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator bucketGroup: false @@ -113,7 +101,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -195,13 +183,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: d @@ -239,45 +226,34 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Group By Operator - bucketGroup: false - keys: + Select Operator + expressions: expr: _col1 type: string - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + outputColumnNames: _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator bucketGroup: false @@ -347,14 +323,13 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: d @@ -392,38 +367,27 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 Reduce Operator Tree: Group By Operator bucketGroup: false @@ -439,7 +403,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -521,13 +485,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: d @@ -565,38 +528,27 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: -1 Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 Reduce Operator Tree: Group By Operator bucketGroup: false Index: ql/src/test/results/clientpositive/semijoin.q.out =================================================================== --- ql/src/test/results/clientpositive/semijoin.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/semijoin.q.out (working copy) @@ -1137,13 +1137,12 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -1193,38 +1192,27 @@ 1 [Column[_col0]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int Reduce Operator Tree: Extract File Output Operator @@ -1712,13 +1700,12 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t2) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b c))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: b @@ -1802,38 +1789,27 @@ 2 [Column[_col0]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int Reduce Operator Tree: Extract File Output Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out (working copy) @@ -70,13 +70,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -122,21 +121,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -193,47 +191,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -264,7 +221,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out (working copy) @@ -59,7 +59,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -85,35 +84,24 @@ 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 - type: bigint + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/join31.q.out =================================================================== --- ql/src/test/results/clientpositive/join31.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/join31.q.out (working copy) @@ -1,310 +0,0 @@ -PREHOOK: query: CREATE TABLE dest_j1(key STRING, cnt INT) -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -group by subq1.key -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -group by subq1.key -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL subq1) key)))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-5 - Stage-2 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq2:y - TableScan - alias: y - Select Operator - expressions: - expr: key - type: string - outputColumnNames: key - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: -#### A masked pattern was here #### - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: -#### A masked pattern was here #### - HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - Position of Big Table: 1 - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - outputColumnNames: _col0 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col0 - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-5 - Map Reduce - Alias -> Map Operator Tree: - subq1:x - TableScan - alias: x - Select Operator - expressions: - expr: key - type: string - outputColumnNames: key - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: key - type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - keys: - expr: KEY._col0 - type: string - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - -PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -group by subq1.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -group by subq1.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: select * from dest_j1 x order by x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: query: select * from dest_j1 x order by x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -128 1 -146 1 -150 1 -213 1 -224 1 -238 1 -255 1 -273 1 -278 1 -311 1 -369 1 -401 1 -406 1 -66 1 -98 1 Index: ql/src/test/results/clientpositive/bucketcontext_5.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_5.q.out (revision 1436745) +++ ql/src/test/results/clientpositive/bucketcontext_5.q.out (working copy) @@ -54,13 +54,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -106,21 +105,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -173,47 +171,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -244,7 +201,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big [b] Stage: Stage-0 Fetch Operator @@ -271,7 +228,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -293,21 +249,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -358,47 +313,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -429,7 +343,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientnegative/join29.q.out =================================================================== --- ql/src/test/results/clientnegative/join29.q.out (revision 0) +++ ql/src/test/results/clientnegative/join29.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join33.q.out =================================================================== --- ql/src/test/results/clientnegative/join33.q.out (revision 0) +++ ql/src/test/results/clientnegative/join33.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/mapjoin_mapjoin.q.out =================================================================== --- ql/src/test/results/clientnegative/mapjoin_mapjoin.q.out (revision 0) +++ ql/src/test/results/clientnegative/mapjoin_mapjoin.q.out (working copy) @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join34.q.out =================================================================== --- ql/src/test/results/clientnegative/join34.q.out (revision 0) +++ ql/src/test/results/clientnegative/join34.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join35.q.out =================================================================== --- ql/src/test/results/clientnegative/join35.q.out (revision 0) +++ ql/src/test/results/clientnegative/join35.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/mapjoin_subquery2.q.out =================================================================== --- ql/src/test/results/clientnegative/mapjoin_subquery2.q.out (revision 0) +++ ql/src/test/results/clientnegative/mapjoin_subquery2.q.out (working copy) @@ -0,0 +1,52 @@ +PREHOOK: query: drop table x +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table x +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table y +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table y +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table z +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table z +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE x (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE x (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@x +PREHOOK: query: CREATE TABLE y (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE y (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@y +PREHOOK: query: CREATE TABLE z (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE z (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@z +PREHOOK: query: load data local inpath '../data/files/x.txt' INTO TABLE x +PREHOOK: type: LOAD +PREHOOK: Output: default@x +POSTHOOK: query: load data local inpath '../data/files/x.txt' INTO TABLE x +POSTHOOK: type: LOAD +POSTHOOK: Output: default@x +PREHOOK: query: load data local inpath '../data/files/y.txt' INTO TABLE y +PREHOOK: type: LOAD +PREHOOK: Output: default@y +POSTHOOK: query: load data local inpath '../data/files/y.txt' INTO TABLE y +POSTHOOK: type: LOAD +POSTHOOK: Output: default@y +PREHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z +PREHOOK: type: LOAD +PREHOOK: Output: default@z +POSTHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z +POSTHOOK: type: LOAD +POSTHOOK: Output: default@z +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/mapjoin_subquery.q.out =================================================================== --- ql/src/test/results/clientnegative/mapjoin_subquery.q.out (revision 0) +++ ql/src/test/results/clientnegative/mapjoin_subquery.q.out (working copy) @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join31.q.out =================================================================== --- ql/src/test/results/clientnegative/join31.q.out (revision 0) +++ ql/src/test/results/clientnegative/join31.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, cnt INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join28.q.out =================================================================== --- ql/src/test/results/clientnegative/join28.q.out (revision 0) +++ ql/src/test/results/clientnegative/join28.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/union22.q.out =================================================================== --- ql/src/test/results/clientnegative/union22.q.out (revision 0) +++ ql/src/test/results/clientnegative/union22.q.out (working copy) @@ -0,0 +1,45 @@ +PREHOOK: query: create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dst_union22 +PREHOOK: query: create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dst_union22_delta +PREHOOK: query: insert overwrite table dst_union22 partition (ds='1') +select key, value, key , value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dst_union22@ds=1 +POSTHOOK: query: insert overwrite table dst_union22 partition (ds='1') +select key, value, key , value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dst_union22@ds=1 +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table dst_union22_delta partition (ds='1') +select key, key, value, key, value, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dst_union22_delta@ds=1 +POSTHOOK: query: insert overwrite table dst_union22_delta partition (ds='1') +select key, key, value, key, value, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dst_union22_delta@ds=1 +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join32.q.out =================================================================== --- ql/src/test/results/clientnegative/join32.q.out (revision 0) +++ ql/src/test/results/clientnegative/join32.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/queries/clientpositive/join32.q =================================================================== --- ql/src/test/queries/clientpositive/join32.q (revision 1436745) +++ ql/src/test/queries/clientpositive/join32.q (working copy) @@ -1,17 +0,0 @@ -CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; - -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); - -select * from dest_j1 x order by x.key; - - - Index: ql/src/test/queries/clientpositive/mapjoin_subquery2.q =================================================================== --- ql/src/test/queries/clientpositive/mapjoin_subquery2.q (revision 1436745) +++ ql/src/test/queries/clientpositive/mapjoin_subquery2.q (working copy) @@ -1,39 +0,0 @@ -drop table x; -drop table y; -drop table z; - -CREATE TABLE x (name STRING, id INT) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; - -CREATE TABLE y (id INT, name STRING) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; - -CREATE TABLE z (id INT, name STRING) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; - -load data local inpath '../data/files/x.txt' INTO TABLE x; -load data local inpath '../data/files/y.txt' INTO TABLE y; -load data local inpath '../data/files/z.txt' INTO TABLE z; - -SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id); - -EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id); - -SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id); - -drop table x; -drop table y; -drop table z; Index: ql/src/test/queries/clientpositive/join29.q =================================================================== --- ql/src/test/queries/clientpositive/join29.q (revision 1436745) +++ ql/src/test/queries/clientpositive/join29.q (working copy) @@ -1,14 +0,0 @@ -CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT); - -EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key); - -select * from dest_j1 x order by x.key; Index: ql/src/test/queries/clientpositive/mapjoin_subquery.q =================================================================== --- ql/src/test/queries/clientpositive/mapjoin_subquery.q (revision 1436745) +++ ql/src/test/queries/clientpositive/mapjoin_subquery.q (working copy) @@ -1,28 +0,0 @@ -EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); - -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); - -EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1; - - -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1; Index: ql/src/test/queries/clientpositive/union22.q =================================================================== --- ql/src/test/queries/clientpositive/union22.q (revision 1436745) +++ ql/src/test/queries/clientpositive/union22.q (working copy) @@ -1,41 +0,0 @@ - -create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string); - - -create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string); - -insert overwrite table dst_union22 partition (ds='1') -select key, value, key , value from src; - -insert overwrite table dst_union22_delta partition (ds='1') -select key, key, value, key, value, value from src; - -set hive.merge.mapfiles=false; - -explain extended -insert overwrite table dst_union22 partition (ds='2') -select * from -( -select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 -union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 -from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on -a.k1 = b.k1 and a.ds='1' -where a.k1 > 20 -) -subq; - -insert overwrite table dst_union22 partition (ds='2') -select * from -( -select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 -union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 -from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on -a.k1 = b.k1 and a.ds='1' -where a.k1 > 20 -) -subq; - - -select * from dst_union22 where ds = '2' order by k1, k2, k3, k4; Index: ql/src/test/queries/clientpositive/join33.q =================================================================== --- ql/src/test/queries/clientpositive/join33.q (revision 1436745) +++ ql/src/test/queries/clientpositive/join33.q (working copy) @@ -1,17 +0,0 @@ -CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; - -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); - -select * from dest_j1 x order by x.key; - - - Index: ql/src/test/queries/clientpositive/join34.q =================================================================== --- ql/src/test/queries/clientpositive/join34.q (revision 1436745) +++ ql/src/test/queries/clientpositive/join34.q (working copy) @@ -1,27 +0,0 @@ - - -CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; - -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value -FROM -( SELECT x.key as key, x.value as value from src x where x.key < 20 - UNION ALL - SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 -) subq1 -JOIN src1 x ON (x.key = subq1.key); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value -FROM -( SELECT x.key as key, x.value as value from src x where x.key < 20 - UNION ALL - SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 -) subq1 -JOIN src1 x ON (x.key = subq1.key); - -select * from dest_j1 x order by x.key; - - - Index: ql/src/test/queries/clientpositive/mapjoin_mapjoin.q =================================================================== --- ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (revision 1436745) +++ ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (working copy) @@ -1,5 +0,0 @@ -explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key); - -explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds; - -select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds; Index: ql/src/test/queries/clientpositive/join31.q =================================================================== --- ql/src/test/queries/clientpositive/join31.q (revision 1436745) +++ ql/src/test/queries/clientpositive/join31.q (working copy) @@ -1,16 +0,0 @@ -CREATE TABLE dest_j1(key STRING, cnt INT); - -EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -group by subq1.key; - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -group by subq1.key; - -select * from dest_j1 x order by x.key; Index: ql/src/test/queries/clientpositive/join35.q =================================================================== --- ql/src/test/queries/clientpositive/join35.q (revision 1436745) +++ ql/src/test/queries/clientpositive/join35.q (working copy) @@ -1,27 +0,0 @@ - - -CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE; - -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key); - -select * from dest_j1 x order by x.key; - - - Index: ql/src/test/queries/clientpositive/join28.q =================================================================== --- ql/src/test/queries/clientpositive/join28.q (revision 1436745) +++ ql/src/test/queries/clientpositive/join28.q (working copy) @@ -1,23 +0,0 @@ - - -CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE; - -EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); - -select * from dest_j1 x order by x.key; - - - Index: ql/src/test/queries/clientpositive/smb_mapjoin_16.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_16.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_16.q (working copy) @@ -0,0 +1,21 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT *; + +-- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key; +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key; Index: ql/src/test/queries/clientnegative/join31.q =================================================================== --- ql/src/test/queries/clientnegative/join31.q (revision 0) +++ ql/src/test/queries/clientnegative/join31.q (working copy) @@ -0,0 +1,9 @@ +CREATE TABLE dest_j1(key STRING, cnt INT); + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) +group by subq1.key; + Index: ql/src/test/queries/clientnegative/union22.q =================================================================== --- ql/src/test/queries/clientnegative/union22.q (revision 0) +++ ql/src/test/queries/clientnegative/union22.q (working copy) @@ -0,0 +1,23 @@ +create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string); +create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string); + +insert overwrite table dst_union22 partition (ds='1') +select key, value, key , value from src; + +insert overwrite table dst_union22_delta partition (ds='1') +select key, key, value, key, value, value from src; + +set hive.merge.mapfiles=false; + +explain extended +insert overwrite table dst_union22 partition (ds='2') +select * from +( +select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 +union all +select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 +from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on +a.k1 = b.k1 and a.ds='1' +where a.k1 > 20 +) +subq; Index: ql/src/test/queries/clientnegative/join32.q =================================================================== --- ql/src/test/queries/clientnegative/join32.q (revision 0) +++ ql/src/test/queries/clientnegative/join32.q (working copy) @@ -0,0 +1,11 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + + + + Index: ql/src/test/queries/clientnegative/join33.q =================================================================== --- ql/src/test/queries/clientnegative/join33.q (revision 0) +++ ql/src/test/queries/clientnegative/join33.q (working copy) @@ -0,0 +1,7 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); Index: ql/src/test/queries/clientnegative/join34.q =================================================================== --- ql/src/test/queries/clientnegative/join34.q (revision 0) +++ ql/src/test/queries/clientnegative/join34.q (working copy) @@ -0,0 +1,14 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value +FROM +( SELECT x.key as key, x.value as value from src x where x.key < 20 + UNION ALL + SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 +) subq1 +JOIN src1 x ON (x.key = subq1.key); + + + Index: ql/src/test/queries/clientnegative/join35.q =================================================================== --- ql/src/test/queries/clientnegative/join35.q (revision 0) +++ ql/src/test/queries/clientnegative/join35.q (working copy) @@ -0,0 +1,15 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key); + + + + Index: ql/src/test/queries/clientnegative/join28.q =================================================================== --- ql/src/test/queries/clientnegative/join28.q (revision 0) +++ ql/src/test/queries/clientnegative/join28.q (working copy) @@ -0,0 +1,12 @@ +CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE; + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); + + + Index: ql/src/test/queries/clientnegative/mapjoin_subquery2.q =================================================================== --- ql/src/test/queries/clientnegative/mapjoin_subquery2.q (revision 0) +++ ql/src/test/queries/clientnegative/mapjoin_subquery2.q (working copy) @@ -0,0 +1,23 @@ +drop table x; +drop table y; +drop table z; + +CREATE TABLE x (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; + +CREATE TABLE y (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; + +CREATE TABLE z (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; + +load data local inpath '../data/files/x.txt' INTO TABLE x; +load data local inpath '../data/files/y.txt' INTO TABLE y; +load data local inpath '../data/files/z.txt' INTO TABLE z; + +EXPLAIN +SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +FROM +(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 + FROM y JOIN x ON (x.id = y.id)) subq + JOIN z ON (subq.key1 = z.id); Index: ql/src/test/queries/clientnegative/join29.q =================================================================== --- ql/src/test/queries/clientnegative/join29.q (revision 0) +++ ql/src/test/queries/clientnegative/join29.q (working copy) @@ -0,0 +1,7 @@ +CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT); + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key); Index: ql/src/test/queries/clientnegative/mapjoin_mapjoin.q =================================================================== --- ql/src/test/queries/clientnegative/mapjoin_mapjoin.q (revision 0) +++ ql/src/test/queries/clientnegative/mapjoin_mapjoin.q (working copy) @@ -0,0 +1 @@ +explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key); Index: ql/src/test/queries/clientnegative/mapjoin_subquery.q =================================================================== --- ql/src/test/queries/clientnegative/mapjoin_subquery.q (revision 0) +++ ql/src/test/queries/clientnegative/mapjoin_subquery.q (working copy) @@ -0,0 +1,6 @@ +EXPLAIN +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (working copy) @@ -1,98 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer; - -import java.io.Serializable; -import java.util.HashMap; -import java.util.Map; -import java.util.Stack; - -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; - -/** - * Processor for the rule - map join followed by reduce sink. - */ -public class GenMRRedSink4 implements NodeProcessor { - - public GenMRRedSink4() { - } - - /** - * Reduce Scan encountered. - * - * @param nd - * the reduce sink operator encountered - * @param opProcCtx - * context - */ - public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, - Object... nodeOutputs) throws SemanticException { - ReduceSinkOperator op = (ReduceSinkOperator) nd; - GenMRProcContext ctx = (GenMRProcContext) opProcCtx; - - ctx.getParseCtx(); - - // map-join consisted on a bunch of map-only jobs, and it has been split - // after the mapjoin - Operator reducer = op.getChildOperators().get(0); - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork plan = (MapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = ctx - .getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); - - ctx.setCurrTask(currTask); - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - // When the reducer is encountered for the first time - if (plan.getReducer() == null) { - GenMapRedUtils.initMapJoinPlan(op, ctx, true, null, true, -1); - // When mapjoin is followed by a multi-table insert - } else { - GenMapRedUtils.splitPlan(op, ctx); - } - } else { - // There is a join after mapjoin. One of the branches of mapjoin has already - // been initialized. - // Initialize the current branch, and join with the original plan. - assert plan.getReducer() != reducer; - GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, false, true, null); - } - - mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), - ctx.getCurrAliasId())); - - // the mapjoin operator has been processed - ctx.setCurrMapJoinOp(null); - return null; - } -} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (working copy) @@ -39,16 +39,13 @@ private final transient boolean[] mapOnlySubq; private final transient boolean[] mapOnlySubqSet; private final transient boolean[] rootTask; - private final transient boolean[] mapJoinSubq; private transient int numInputs; - private transient boolean mapJoinQuery; public UnionParseContext(int numInputs) { this.numInputs = numInputs; mapOnlySubq = new boolean[numInputs]; rootTask = new boolean[numInputs]; - mapJoinSubq = new boolean[numInputs]; mapOnlySubqSet = new boolean[numInputs]; } @@ -61,21 +58,6 @@ this.mapOnlySubqSet[pos] = true; } - public boolean getMapJoinSubq(int pos) { - return mapJoinSubq[pos]; - } - - public void setMapJoinSubq(int pos, boolean mapJoinSubq) { - this.mapJoinSubq[pos] = mapJoinSubq; - if (mapJoinSubq) { - mapJoinQuery = true; - } - } - - public boolean getMapJoinQuery() { - return mapJoinQuery; - } - public boolean getRootTask(int pos) { return rootTask[pos]; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (working copy) @@ -107,30 +107,6 @@ } /** - * Map-join subquery followed by Union. - */ - public static class MapJoinUnion implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - UnionOperator union = (UnionOperator) nd; - UnionProcContext ctx = (UnionProcContext) procCtx; - - // find the branch on which this processor was invoked - int pos = getPositionParent(union, stack); - UnionParseContext uCtx = ctx.getUnionParseContext(union); - if (uCtx == null) { - uCtx = new UnionParseContext(union.getConf().getNumInputs()); - } - - uCtx.setMapJoinSubq(pos, true); - ctx.setUnionParseContext(union, uCtx); - return null; - } - } - - /** * Union subquery followed by Union. */ public static class UnknownUnion implements NodeProcessor { @@ -330,10 +306,6 @@ return new MapUnion(); } - public static NodeProcessor getMapJoinUnion() { - return new MapJoinUnion(); - } - public static NodeProcessor getUnknownUnion() { return new UnknownUnion(); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (working copy) @@ -25,7 +25,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; @@ -79,9 +78,6 @@ opRules.put(new RuleRegExp("R3", TableScanOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"), UnionProcFactory.getMapUnion()); - opRules.put(new RuleRegExp("R4", - MapJoinOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"), - UnionProcFactory.getMapJoinUnion()); // The dispatcher fires the processor for the matching rule and passes the // context along Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -227,7 +227,7 @@ QBJoinTree newJoinTree = newWork.getJoinTree(); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = MapJoinProcessor.convertMapJoin(opParseCtxMap, op, - newJoinTree, mapJoinPos, true); + newJoinTree, mapJoinPos, true, false); // generate the local work and return the big table alias String bigTableAlias = MapJoinProcessor .genMapJoinLocalWork(newWork, newMapJoinOp, mapJoinPos); @@ -241,9 +241,43 @@ e.printStackTrace(); throw new SemanticException("Generate New MapJoin Opertor Exeception " + e.getMessage()); } + } + private static void checkParentOperatorType(Operator op) + throws SemanticException { + if (!op.opAllowedBeforeMapJoin()) { + throw new SemanticException(ErrorMsg.OPERATOR_NOT_ALLOWED_WITH_MAPJOIN.getMsg()); + } + if (op.getParentOperators() != null) { + for (Operator parentOp : op.getParentOperators()) { + checkParentOperatorType(parentOp); + } + } } + private static void checkChildOperatorType(Operator op) + throws SemanticException { + if (!op.opAllowedAfterMapJoin()) { + throw new SemanticException(ErrorMsg.OPERATOR_NOT_ALLOWED_WITH_MAPJOIN.getMsg()); + } + if (op.getChildOperators() != null) { + for (Operator childOp : op.getChildOperators()) { + checkChildOperatorType(childOp); + } + } + } + + private static void validateMapJoinTypes(Operator op) + throws SemanticException { + for (Operator parentOp : op.getParentOperators()) { + checkParentOperatorType(parentOp); + } + + for (Operator childOp : op.getChildOperators()) { + checkChildOperatorType(childOp); + } + } + /** * convert a regular join to a a map-side join. * @@ -259,8 +293,10 @@ */ public static MapJoinOperator convertMapJoin( LinkedHashMap, OpParseContext> opParseCtxMap, - JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin) + JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin, + boolean validateMapJoinTree) throws SemanticException { + // outer join cannot be performed on a table which is being cached JoinDesc desc = op.getConf(); JoinCondDesc[] condns = desc.getConds(); @@ -477,6 +513,11 @@ op.setChildOperators(null); op.setParentOperators(null); + // make sure only map-joins can be performed. + if (validateMapJoinTree) { + validateMapJoinTypes(mapJoinOp); + } + return mapJoinOp; } @@ -487,11 +528,10 @@ HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN); - LinkedHashMap, OpParseContext> opParseCtxMap = pctx .getOpParseCtx(); MapJoinOperator mapJoinOp = convertMapJoin(opParseCtxMap, op, joinTree, mapJoinPos, - noCheckOuterJoin); + noCheckOuterJoin, true); // create a dummy select to select all columns genSelectPlan(pctx, mapJoinOp); return mapJoinOp; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (working copy) @@ -27,7 +27,6 @@ import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -40,7 +39,6 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -155,90 +153,10 @@ } } - /** - * GenMRMapJoinCtx. - * - */ - public static class GenMRMapJoinCtx { - String taskTmpDir; - TableDesc tt_desc; - Operator rootMapJoinOp; - AbstractMapJoinOperator oldMapJoin; - - public GenMRMapJoinCtx() { - taskTmpDir = null; - tt_desc = null; - rootMapJoinOp = null; - oldMapJoin = null; - } - - /** - * @param taskTmpDir - * @param tt_desc - * @param rootMapJoinOp - * @param oldMapJoin - */ - public GenMRMapJoinCtx(String taskTmpDir, TableDesc tt_desc, - Operator rootMapJoinOp, - AbstractMapJoinOperator oldMapJoin) { - this.taskTmpDir = taskTmpDir; - this.tt_desc = tt_desc; - this.rootMapJoinOp = rootMapJoinOp; - this.oldMapJoin = oldMapJoin; - } - - public void setTaskTmpDir(String taskTmpDir) { - this.taskTmpDir = taskTmpDir; - } - - public String getTaskTmpDir() { - return taskTmpDir; - } - - public void setTTDesc(TableDesc tt_desc) { - this.tt_desc = tt_desc; - } - - public TableDesc getTTDesc() { - return tt_desc; - } - - /** - * @return the childSelect - */ - public Operator getRootMapJoinOp() { - return rootMapJoinOp; - } - - /** - * @param rootMapJoinOp - * the rootMapJoinOp to set - */ - public void setRootMapJoinOp(Operator rootMapJoinOp) { - this.rootMapJoinOp = rootMapJoinOp; - } - - /** - * @return the oldMapJoin - */ - public AbstractMapJoinOperator getOldMapJoin() { - return oldMapJoin; - } - - /** - * @param oldMapJoin - * the oldMapJoin to set - */ - public void setOldMapJoin(AbstractMapJoinOperator oldMapJoin) { - this.oldMapJoin = oldMapJoin; - } - } - private HiveConf conf; private HashMap, Task> opTaskMap; private HashMap unionTaskMap; - private HashMap, GenMRMapJoinCtx> mapJoinTaskMap; private List> seenOps; private List seenFileSinkOps; @@ -250,7 +168,6 @@ private Task currTask; private Operator currTopOp; private UnionOperator currUnionOp; - private AbstractMapJoinOperator currMapJoinOp; private String currAliasId; private List> rootOps; private DependencyCollectionTask dependencyTaskForMultiInsert; @@ -313,12 +230,10 @@ currTask = null; currTopOp = null; currUnionOp = null; - currMapJoinOp = null; currAliasId = null; rootOps = new ArrayList>(); rootOps.addAll(parseCtx.getTopOps().values()); unionTaskMap = new HashMap(); - mapJoinTaskMap = new HashMap, GenMRMapJoinCtx>(); dependencyTaskForMultiInsert = null; linkedFileDescTasks = null; } @@ -488,19 +403,7 @@ this.currUnionOp = currUnionOp; } - public AbstractMapJoinOperator getCurrMapJoinOp() { - return currMapJoinOp; - } - /** - * @param currMapJoinOp - * current map join operator - */ - public void setCurrMapJoinOp(AbstractMapJoinOperator currMapJoinOp) { - this.currMapJoinOp = currMapJoinOp; - } - - /** * @return current top alias */ public String getCurrAliasId() { @@ -523,14 +426,6 @@ unionTaskMap.put(op, uTask); } - public GenMRMapJoinCtx getMapJoinCtx(AbstractMapJoinOperator op) { - return mapJoinTaskMap.get(op); - } - - public void setMapJoinCtx(AbstractMapJoinOperator op, GenMRMapJoinCtx mjCtx) { - mapJoinTaskMap.put(op, mjCtx); - } - /** * Get the input set. */ Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (working copy) @@ -26,7 +26,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.Task; @@ -35,7 +34,6 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; @@ -44,10 +42,8 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; @@ -84,16 +80,10 @@ } UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union); - if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) { - GenMapRedUtils.mergeMapJoinUnion(union, ctx, - UnionProcFactory.getPositionParent(union, stack)); - } - else { - ctx.getMapCurrCtx().put( - (Operator) union, - new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), - ctx.getCurrAliasId())); - } + ctx.getMapCurrCtx().put( + (Operator) union, + new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), + ctx.getCurrAliasId())); // if the union is the first time seen, set current task to GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); @@ -103,7 +93,7 @@ ctx.setUnionTask(union, uCtxTask); } - Task uTask=ctx.getCurrTask(); + Task uTask = ctx.getCurrTask(); if (uTask.getParentTasks() == null || uTask.getParentTasks().isEmpty()) { if (!ctx.getRootTasks().contains(uTask)) { @@ -134,8 +124,9 @@ GenMRUnionCtx uCtxTask) { ParseContext parseCtx = ctx.getParseCtx(); - TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema( - parent.getSchema(), "temporarycol")); + TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils + .getFieldSchemasFromRowSchema( + parent.getSchema(), "temporarycol")); // generate the temporary file Context baseCtx = parseCtx.getContext(); @@ -150,7 +141,7 @@ parent.getChildOperators().set(0, fs_op); List> parentOpList = - new ArrayList>(); + new ArrayList>(); parentOpList.add(parent); fs_op.setParentOperators(parentOpList); @@ -158,7 +149,7 @@ Operator ts_op = OperatorFactory.get( new TableScanDesc(), parent.getSchema()); List> childOpList = - new ArrayList>(); + new ArrayList>(); childOpList.add(child); ts_op.setChildOperators(childOpList); child.replaceParent(parent, ts_op); @@ -212,27 +203,9 @@ } } - private void processSubQueryUnionMapJoin(GenMRProcContext ctx) { - AbstractMapJoinOperator mjOp = ctx.getCurrMapJoinOp(); - assert mjOp != null; - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mjOp); - assert mjCtx != null; - MapredWork plan = (MapredWork) ctx.getCurrTask().getWork(); - - String taskTmpDir = mjCtx.getTaskTmpDir(); - TableDesc tt_desc = mjCtx.getTTDesc(); - assert plan.getPathToAliases().get(taskTmpDir) == null; - plan.getPathToAliases().put(taskTmpDir, new ArrayList()); - plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, - new PartitionDesc(tt_desc, null)); - plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); - } - /** * Union Operator encountered . Currently, the algorithm is pretty simple: If - * all the sub-queries are map-only, don't do anything. However, if there is a - * mapjoin followed by the union, merge at the union Otherwise, insert a + * all the sub-queries are map-only, don't do anything. Otherwise, insert a * FileSink on top of all the sub-queries. * * This can be optimized later on. @@ -284,8 +257,7 @@ } // Copy into the current union task plan if - if (uPrsCtx.getMapOnlySubq(pos) - && !uPrsCtx.getMapJoinSubq(pos) && uPrsCtx.getRootTask(pos)) { + if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)) { processSubQueryUnionMerge(ctx, uCtxTask, union, stack); } // If it a map-reduce job, create a temporary file @@ -295,13 +267,10 @@ && (!ctx.getRootTasks().contains(currTask))) { ctx.getRootTasks().add(currTask); } - // If there is a mapjoin at position 'pos' - if (uPrsCtx.getMapJoinSubq(pos)) { - processSubQueryUnionMapJoin(ctx); - } - processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos), union, uTask, ctx, uCtxTask); - //the currAliasId and CurrTopOp is not valid any more + processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos), union, uTask, + ctx, uCtxTask); + // the currAliasId and CurrTopOp is not valid any more ctx.setCurrAliasId(null); ctx.setCurrTopOp(null); ctx.getOpTaskMap().put(null, uTask); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (working copy) @@ -81,7 +81,7 @@ } else { // This will happen in case of joins. The current plan can be thrown away // after being merged with the original plan - GenMapRedUtils.joinPlan(op, null, opMapTask, ctx, -1, false, false, null); + GenMapRedUtils.joinPlan(op, null, opMapTask, ctx, -1, false); currTask = opMapTask; ctx.setCurrTask(currTask); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -20,7 +20,6 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.ConcurrentModificationException; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; @@ -33,12 +32,10 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -47,19 +44,15 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; @@ -97,12 +90,12 @@ throws SemanticException { Operator reducer = op.getChildOperators().get(0); Map, GenMapRedCtx> mapCurrCtx = - opProcCtx.getMapCurrCtx(); + opProcCtx.getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); Task currTask = mapredCtx.getCurrTask(); MapredWork plan = (MapredWork) currTask.getWork(); HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); + opProcCtx.getOpTaskMap(); Operator currTopOp = opProcCtx.getCurrTopOp(); opTaskMap.put(reducer, currTask); @@ -114,7 +107,7 @@ List> rootTasks = opProcCtx.getRootTasks(); if (!rootTasks.contains(currTask)) { - rootTasks.add(currTask); + rootTasks.add(currTask); } if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); @@ -137,167 +130,8 @@ opProcCtx.setCurrAliasId(currAliasId); } - public static void initMapJoinPlan( - Operator op, GenMRProcContext ctx, - boolean readInputMapJoin, UnionOperator currUnionOp, boolean setReducer, int pos) - throws SemanticException { - initMapJoinPlan(op, ctx, readInputMapJoin, currUnionOp, setReducer, pos, false); - } /** - * Initialize the current plan by adding it to root tasks. - * - * @param op - * the map join operator encountered - * @param opProcCtx - * processing context - * @param pos - * position of the parent - */ - public static void initMapJoinPlan(Operator op, - GenMRProcContext opProcCtx, boolean readInputMapJoin, - UnionOperator currUnionOp, boolean setReducer, int pos, boolean createLocalPlan) - throws SemanticException { - Map, GenMapRedCtx> mapCurrCtx = - opProcCtx.getMapCurrCtx(); - assert (((pos == -1) && (readInputMapJoin)) || (pos != -1)); - int parentPos = (pos == -1) ? 0 : pos; - GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get( - parentPos)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork plan = (MapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); - Operator currTopOp = opProcCtx.getCurrTopOp(); - - // The mapjoin has already been encountered. Some context must be stored - // about that - if (readInputMapJoin) { - AbstractMapJoinOperator currMapJoinOp = opProcCtx.getCurrMapJoinOp(); - assert currMapJoinOp != null; - boolean local = ((pos == -1) || (pos == (currMapJoinOp.getConf()).getPosBigTable())) ? - false : true; - - if (setReducer) { - Operator reducer = op.getChildOperators().get(0); - plan.setReducer(reducer); - opTaskMap.put(reducer, currTask); - if (reducer.getClass() == JoinOperator.class) { - plan.setNeedsTagging(true); - } - ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf(); - plan.setNumReduceTasks(desc.getNumReducers()); - } else { - opTaskMap.put(op, currTask); - } - - if (currUnionOp == null) { - GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(currMapJoinOp); - String taskTmpDir; - TableDesc tt_desc; - Operator rootOp; - - if (mjCtx.getOldMapJoin() == null || setReducer) { - taskTmpDir = mjCtx.getTaskTmpDir(); - tt_desc = mjCtx.getTTDesc(); - rootOp = mjCtx.getRootMapJoinOp(); - } else { - GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(mjCtx - .getOldMapJoin()); - taskTmpDir = oldMjCtx.getTaskTmpDir(); - tt_desc = oldMjCtx.getTTDesc(); - rootOp = oldMjCtx.getRootMapJoinOp(); - } - - setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - setupBucketMapJoinInfo(plan, currMapJoinOp, createLocalPlan); - } else { - initUnionPlan(opProcCtx, currUnionOp, currTask, false); - } - - opProcCtx.setCurrMapJoinOp(null); - } else { - MapJoinDesc desc = (MapJoinDesc) op.getConf(); - - // The map is overloaded to keep track of mapjoins also - opTaskMap.put(op, currTask); - - List> rootTasks = opProcCtx.getRootTasks(); - if (!rootTasks.contains(currTask)) { - rootTasks.add(currTask); - } - - assert currTopOp != null; - List> seenOps = opProcCtx.getSeenOps(); - String currAliasId = opProcCtx.getCurrAliasId(); - - seenOps.add(currTopOp); - boolean local = (pos == desc.getPosBigTable()) ? false : true; - setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op, createLocalPlan); - } - - opProcCtx.setCurrTask(currTask); - opProcCtx.setCurrTopOp(null); - opProcCtx.setCurrAliasId(null); - } - - private static void setupBucketMapJoinInfo(MapredWork plan, - AbstractMapJoinOperator currMapJoinOp, boolean createLocalPlan) { - if (currMapJoinOp != null) { - Map>> aliasBucketFileNameMapping = - currMapJoinOp.getConf().getAliasBucketFileNameMapping(); - if(aliasBucketFileNameMapping!= null) { - MapredLocalWork localPlan = plan.getMapLocalWork(); - if(localPlan == null) { - if(currMapJoinOp instanceof SMBMapJoinOperator) { - localPlan = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork(); - } - if (localPlan == null && createLocalPlan) { - localPlan = new MapredLocalWork( - new LinkedHashMap>(), - new LinkedHashMap()); - } - } else { - //local plan is not null, we want to merge it into SMBMapJoinOperator's local work - if(currMapJoinOp instanceof SMBMapJoinOperator) { - MapredLocalWork smbLocalWork = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork(); - if(smbLocalWork != null) { - localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork()); - localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork()); - } - } - } - - if(localPlan == null) { - return; - } - - if(currMapJoinOp instanceof SMBMapJoinOperator) { - plan.setMapLocalWork(null); - ((SMBMapJoinOperator)currMapJoinOp).getConf().setLocalWork(localPlan); - } else { - plan.setMapLocalWork(localPlan); - } - BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext(); - localPlan.setBucketMapjoinContext(bucketMJCxt); - bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); - bucketMJCxt.setBucketFileNameMapping(currMapJoinOp.getConf().getBigTableBucketNumMapping()); - localPlan.setInputFileChangeSensitive(true); - bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); - bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); - bucketMJCxt.setBigTablePartSpecToFileMapping( - currMapJoinOp.getConf().getBigTablePartSpecToFileMapping()); - // BucketizedHiveInputFormat should be used for either sort merge join or bucket map join - if ((currMapJoinOp instanceof SMBMapJoinOperator) - || (currMapJoinOp.getConf().isBucketMapJoin())) { - plan.setUseBucketizedHiveInputFormat(true); - } - } - } - } - - /** * Initialize the current union plan. * * @param op @@ -312,7 +146,7 @@ MapredWork plan = (MapredWork) unionTask.getWork(); HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); + opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, unionTask); plan.setReducer(reducer); @@ -377,6 +211,7 @@ Task currTask, boolean local) throws SemanticException { MapredWork plan = (MapredWork) currTask.getWork(); + // In case of lateral views followed by a join, the same tree // can be traversed more than one if (currUnionOp != null) { @@ -433,13 +268,6 @@ opProcCtx.setCurrTask(existingTask); } - public static void joinPlan(Operator op, - Task oldTask, Task task, - GenMRProcContext opProcCtx, int pos, boolean split, - boolean readMapJoinData, UnionOperator currUnionOp) throws SemanticException { - joinPlan(op, oldTask, task, opProcCtx, pos, split, readMapJoinData, currUnionOp, false); - } - /** * Merge the current task with the task for the current reducer. * @@ -456,8 +284,7 @@ */ public static void joinPlan(Operator op, Task oldTask, Task task, - GenMRProcContext opProcCtx, int pos, boolean split, - boolean readMapJoinData, UnionOperator currUnionOp, boolean createLocalWork) + GenMRProcContext opProcCtx, int pos, boolean split) throws SemanticException { Task currTask = task; MapredWork plan = (MapredWork) currTask.getWork(); @@ -493,53 +320,15 @@ : true; } setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - if(op instanceof AbstractMapJoinOperator) { - setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op, createLocalWork); - } } currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); - } else if (opProcCtx.getCurrMapJoinOp() != null) { - AbstractMapJoinOperator mjOp = opProcCtx.getCurrMapJoinOp(); - if (currUnionOp != null) { - initUnionPlan(opProcCtx, currUnionOp, currTask, false); - } else { - GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp); - - // In case of map-join followed by map-join, the file needs to be - // obtained from the old map join - AbstractMapJoinOperator oldMapJoin = mjCtx.getOldMapJoin(); - String taskTmpDir = null; - TableDesc tt_desc = null; - Operator rootOp = null; - - boolean local = ((pos == -1) || (pos == (mjOp.getConf()) - .getPosBigTable())) ? false : true; - if (oldMapJoin == null) { - if (opProcCtx.getParseCtx().getListMapJoinOpsNoReducer().contains(mjOp) - || local || (oldTask != null) && (parTasks != null)) { - taskTmpDir = mjCtx.getTaskTmpDir(); - tt_desc = mjCtx.getTTDesc(); - rootOp = mjCtx.getRootMapJoinOp(); - } - } else { - GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(oldMapJoin); - assert oldMjCtx != null; - taskTmpDir = oldMjCtx.getTaskTmpDir(); - tt_desc = oldMjCtx.getTTDesc(); - rootOp = oldMjCtx.getRootMapJoinOp(); - } - - setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - setupBucketMapJoinInfo(plan, oldMapJoin, createLocalWork); - } - opProcCtx.setCurrMapJoinOp(null); } if ((oldTask != null) && (parTasks != null)) { for (Task parTask : parTasks) { parTask.addDependentTask(currTask); - if(opProcCtx.getRootTasks().contains(currTask)) { + if (opProcCtx.getRootTasks().contains(currTask)) { opProcCtx.getRootTasks().remove(currTask); } } @@ -557,7 +346,7 @@ * processing context */ public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) - throws SemanticException { + throws SemanticException { // Generate a new task ParseContext parseCtx = opProcCtx.getParseCtx(); MapredWork cplan = getMapRedWork(parseCtx); @@ -572,7 +361,7 @@ cplan.setNumReduceTasks(new Integer(desc.getNumReducers())); HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); + opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, redTask); Task currTask = opProcCtx.getCurrTask(); @@ -622,7 +411,6 @@ return currentInput; } - /** * set the current task in the mapredWork. * @@ -657,12 +445,12 @@ if (partsList == null) { try { - partsList = parseCtx.getOpToPartList().get((TableScanOperator)topOp); + partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp); if (partsList == null) { partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), - parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), - alias_id, parseCtx.getPrunedPartitions()); - parseCtx.getOpToPartList().put((TableScanOperator)topOp, partsList); + parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), + alias_id, parseCtx.getPrunedPartitions()); + parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList); } } catch (SemanticException e) { throw e; @@ -701,7 +489,8 @@ long sizeNeeded = Integer.MAX_VALUE; int fileLimit = -1; if (parseCtx.getGlobalLimitCtx().isEnable()) { - long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITMAXROWSIZE); + long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), + HiveConf.ConfVars.HIVELIMITMAXROWSIZE); sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow; // for the optimization that reduce number of input file, we limit number // of files allowed. If more than specific number of files have to be @@ -709,7 +498,7 @@ // inputs can cause unpredictable latency. It's not necessarily to be // cheaper. fileLimit = - HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE); + HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE); if (sizePerRow <= 0 || fileLimit <= 0) { LOG.info("Skip optimization to reduce input size of 'limit'"); @@ -735,6 +524,7 @@ // partitioned table and whether any partition is selected or not PlanUtils.addInput(inputs, new ReadEntity(parseCtx.getTopToTable().get(topOp), parentViewInfo)); + for (Partition part : parts) { if (part.getTable().isPartitioned()) { PlanUtils.addInput(inputs, new ReadEntity(part, parentViewInfo)); @@ -902,7 +692,7 @@ Operator topOp, MapredWork plan, boolean local, TableDesc tt_desc) throws SemanticException { - if(path == null || alias == null) { + if (path == null || alias == null) { return; } @@ -984,8 +774,8 @@ MapredWork work = new MapredWork(); boolean mapperCannotSpanPartns = - conf.getBoolVar( - HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); + conf.getBoolVar( + HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); work.setMapperCannotSpanPartns(mapperCannotSpanPartns); work.setPathToAliases(new LinkedHashMap>()); work.setPathToPartitionInfo(new LinkedHashMap()); @@ -1066,7 +856,7 @@ // replace the reduce child with this operator List> childOpList = parent - .getChildOperators(); + .getChildOperators(); for (int pos = 0; pos < childOpList.size(); pos++) { if (childOpList.get(pos) == op) { childOpList.set(pos, fs_op); @@ -1075,7 +865,7 @@ } List> parentOpList = - new ArrayList>(); + new ArrayList>(); parentOpList.add(parent); fs_op.setParentOperators(parentOpList); @@ -1091,7 +881,7 @@ op.getParentOperators().set(posn, ts_op); Map, GenMapRedCtx> mapCurrCtx = - opProcCtx.getMapCurrCtx(); + opProcCtx.getMapCurrCtx(); mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null, null)); String streamDesc = taskTmpDir; @@ -1119,101 +909,14 @@ // Add the path to alias mapping setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, local, tt_desc); - - // This can be cleaned up as a function table in future - if (op instanceof AbstractMapJoinOperator) { - AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) op; - opProcCtx.setCurrMapJoinOp(mjOp); - GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp); - if (mjCtx == null) { - mjCtx = new GenMRMapJoinCtx(taskTmpDir, tt_desc, ts_op, null); - } else { - mjCtx.setTaskTmpDir(taskTmpDir); - mjCtx.setTTDesc(tt_desc); - mjCtx.setRootMapJoinOp(ts_op); - } - opProcCtx.setMapJoinCtx(mjOp, mjCtx); - opProcCtx.getMapCurrCtx().put(parent, - new GenMapRedCtx(childTask, null, null)); - setupBucketMapJoinInfo(cplan, mjOp, false); - } - - currTopOp = null; - String currAliasId = null; - - opProcCtx.setCurrTopOp(currTopOp); - opProcCtx.setCurrAliasId(currAliasId); + opProcCtx.setCurrTopOp(null); + opProcCtx.setCurrAliasId(null); opProcCtx.setCurrTask(childTask); } - public static void mergeMapJoinUnion(UnionOperator union, - GenMRProcContext ctx, int pos) throws SemanticException { - ParseContext parseCtx = ctx.getParseCtx(); - UnionProcContext uCtx = parseCtx.getUCtx(); - - UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union); - assert uPrsCtx != null; - - Task currTask = ctx.getCurrTask(); - - GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); - Task uTask = null; - - union.getParentOperators().get(pos); - MapredWork uPlan = null; - - // union is encountered for the first time - if (uCtxTask == null) { - uCtxTask = new GenMRUnionCtx(); - uPlan = GenMapRedUtils.getMapRedWork(parseCtx); - uTask = TaskFactory.get(uPlan, parseCtx.getConf()); - uCtxTask.setUTask(uTask); - ctx.setUnionTask(union, uCtxTask); - } else { - uTask = uCtxTask.getUTask(); - uPlan = (MapredWork) uTask.getWork(); - } - - // If there is a mapjoin at position 'pos' - if (uPrsCtx.getMapJoinSubq(pos)) { - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(ctx.getCurrMapJoinOp()); - String taskTmpDir = mjCtx.getTaskTmpDir(); - if (uPlan.getPathToAliases().get(taskTmpDir) == null) { - uPlan.getPathToAliases().put(taskTmpDir, new ArrayList()); - uPlan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - uPlan.getPathToPartitionInfo().put(taskTmpDir, - new PartitionDesc(mjCtx.getTTDesc(), null)); - uPlan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); - } - - for (Task t : currTask.getParentTasks()) { - t.addDependentTask(uTask); - } - try { - boolean notDone = true; - while (notDone) { - for (Task t : currTask.getParentTasks()) { - t.removeDependentTask(currTask); - } - notDone = false; - } - } catch (ConcurrentModificationException e) { - } - } else { - setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), uPlan, false, ctx); - } - - ctx.setCurrTask(uTask); - ctx.setCurrAliasId(null); - ctx.setCurrTopOp(null); - ctx.setCurrMapJoinOp(null); - - ctx.getMapCurrCtx().put(union, - new GenMapRedCtx(ctx.getCurrTask(), null, null)); - } - private GenMapRedUtils() { // prevent instantiation } } + Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (working copy) @@ -18,49 +18,38 @@ package org.apache.hadoop.hive.ql.optimizer; import java.io.Serializable; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Stack; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Utils; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.TableDesc; /** * Operator factory for MapJoin processing. */ public final class MapJoinFactory { - public static int getPositionParent(AbstractMapJoinOperator op, Stack stack) { + public static int getPositionParent(AbstractMapJoinOperator op, + Stack stack) { int pos = 0; int size = stack.size(); assert size >= 2 && stack.get(size - 1) == op; Operator parent = - (Operator) stack.get(size - 2); + (Operator) stack.get(size - 2); List> parOp = op.getParentOperators(); pos = parOp.indexOf(parent); assert pos < parOp.size(); @@ -72,217 +61,148 @@ */ public static class TableScanMapJoin implements NodeProcessor { - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; - GenMRProcContext ctx = (GenMRProcContext) procCtx; + public static void setupBucketMapJoinInfo(MapredWork plan, + AbstractMapJoinOperator currMapJoinOp) { + if (currMapJoinOp != null) { + Map>> aliasBucketFileNameMapping = + currMapJoinOp.getConf().getAliasBucketFileNameMapping(); + if (aliasBucketFileNameMapping != null) { + MapredLocalWork localPlan = plan.getMapLocalWork(); + if (localPlan == null) { + if (currMapJoinOp instanceof SMBMapJoinOperator) { + localPlan = ((SMBMapJoinOperator) currMapJoinOp).getConf().getLocalWork(); + } + } else { + // local plan is not null, we want to merge it into SMBMapJoinOperator's local work + if (currMapJoinOp instanceof SMBMapJoinOperator) { + MapredLocalWork smbLocalWork = ((SMBMapJoinOperator) currMapJoinOp).getConf() + .getLocalWork(); + if (smbLocalWork != null) { + localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork()); + localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork()); + } + } + } - // find the branch on which this processor was invoked - int pos = getPositionParent(mapJoin, stack); + if (localPlan == null) { + return; + } - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get( - pos)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork currPlan = (MapredWork) currTask.getWork(); - Operator currTopOp = mapredCtx.getCurrTopOp(); - String currAliasId = mapredCtx.getCurrAliasId(); - Operator reducer = mapJoin; - HashMap, Task> opTaskMap = - ctx.getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); - - ctx.setCurrTopOp(currTopOp); - ctx.setCurrAliasId(currAliasId); - ctx.setCurrTask(currTask); - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - assert currPlan.getReducer() == null; - GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, false, null, false, pos); - } else { - // The current plan can be thrown away after being merged with the - // original plan - GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, - false, null); - currTask = opMapTask; - ctx.setCurrTask(currTask); + if (currMapJoinOp instanceof SMBMapJoinOperator) { + plan.setMapLocalWork(null); + ((SMBMapJoinOperator) currMapJoinOp).getConf().setLocalWork(localPlan); + } else { + plan.setMapLocalWork(localPlan); + } + BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext(); + localPlan.setBucketMapjoinContext(bucketMJCxt); + bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); + bucketMJCxt.setBucketFileNameMapping( + currMapJoinOp.getConf().getBigTableBucketNumMapping()); + localPlan.setInputFileChangeSensitive(true); + bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); + bucketMJCxt + .setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); + bucketMJCxt.setBigTablePartSpecToFileMapping( + currMapJoinOp.getConf().getBigTablePartSpecToFileMapping()); + // BucketizedHiveInputFormat should be used for either sort merge join or bucket map join + if ((currMapJoinOp instanceof SMBMapJoinOperator) + || (currMapJoinOp.getConf().isBucketMapJoin())) { + plan.setUseBucketizedHiveInputFormat(true); + } + } } - - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx - .getCurrTopOp(), ctx.getCurrAliasId())); - return null; } - } - /** - * ReduceSink followed by MapJoin. - */ - public static class ReduceSinkMapJoin implements NodeProcessor { + /** + * Initialize the current plan by adding it to root tasks. + * + * @param op + * the map join operator encountered + * @param opProcCtx + * processing context + * @param pos + * position of the parent + */ + private static void initMapJoinPlan(AbstractMapJoinOperator op, + GenMRProcContext opProcCtx, int pos) + throws SemanticException { + Map, GenMapRedCtx> mapCurrCtx = + opProcCtx.getMapCurrCtx(); + int parentPos = (pos == -1) ? 0 : pos; + GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get( + parentPos)); + Task currTask = mapredCtx.getCurrTask(); + MapredWork plan = (MapredWork) currTask.getWork(); + HashMap, Task> opTaskMap = + opProcCtx.getOpTaskMap(); + Operator currTopOp = opProcCtx.getCurrTopOp(); - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; - GenMRProcContext opProcCtx = (GenMRProcContext) procCtx; + MapJoinDesc desc = (MapJoinDesc) op.getConf(); - ParseContext parseCtx = opProcCtx.getParseCtx(); - MapredWork cplan = GenMapRedUtils.getMapRedWork(parseCtx); - Task redTask = TaskFactory.get(cplan, parseCtx - .getConf()); - Task currTask = opProcCtx.getCurrTask(); + // The map is overloaded to keep track of mapjoins also + opTaskMap.put(op, currTask); - // find the branch on which this processor was invoked - int pos = getPositionParent(mapJoin, stack); - boolean local = (pos == ((mapJoin.getConf())).getPosBigTable()) ? false - : true; + List> rootTasks = opProcCtx.getRootTasks(); + assert (!rootTasks.contains(currTask)); + rootTasks.add(currTask); - GenMapRedUtils.splitTasks(mapJoin, currTask, redTask, opProcCtx, false, - local, pos); + assert currTopOp != null; + opProcCtx.getSeenOps().add(currTopOp); - currTask = opProcCtx.getCurrTask(); - HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); - Task opMapTask = opTaskMap.get(mapJoin); - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - assert cplan.getReducer() == null; - opTaskMap.put(mapJoin, currTask); - opProcCtx.setCurrMapJoinOp(null); - } else { - // The current plan can be thrown away after being merged with the - // original plan - GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, opProcCtx, pos, - false, false, null); - currTask = opMapTask; - opProcCtx.setCurrTask(currTask); - } - - return null; + String currAliasId = opProcCtx.getCurrAliasId(); + boolean local = (pos == desc.getPosBigTable()) ? false : true; + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); + setupBucketMapJoinInfo(plan, op); } - } - /** - * MapJoin followed by Select. - */ - public static class MapJoin implements NodeProcessor { - /** - * Create a task by splitting the plan below the join. The reason, we have - * to do so in the processing of Select and not MapJoin is due to the - * walker. While processing a node, it is not safe to alter its children - * because that will decide the course of the walk. It is perfectly fine to - * muck around with its parents though, since those nodes have already been - * visited. + * Merge the current task with the task for the current reducer. + * + * @param op + * operator being processed + * @param oldTask + * the old task for the current reducer + * @param task + * the current task for the current reducer + * @param opProcCtx + * processing context + * @param pos + * position of the parent in the stack */ - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { + public static void joinMapJoinPlan(AbstractMapJoinOperator op, + Task task, + GenMRProcContext opProcCtx, int pos) + throws SemanticException { + Task currTask = task; + MapredWork plan = (MapredWork) currTask.getWork(); + Operator currTopOp = opProcCtx.getCurrTopOp(); + List> parTasks = null; - SelectOperator sel = (SelectOperator) nd; - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) sel.getParentOperators().get( - 0); - assert sel.getParentOperators().size() == 1; + List> seenOps = opProcCtx.getSeenOps(); + String currAliasId = opProcCtx.getCurrAliasId(); - GenMRProcContext ctx = (GenMRProcContext) procCtx; - ParseContext parseCtx = ctx.getParseCtx(); - - // is the mapjoin followed by a reducer - List> listMapJoinOps = parseCtx - .getListMapJoinOpsNoReducer(); - - if (listMapJoinOps.contains(mapJoin)) { - ctx.setCurrAliasId(null); - ctx.setCurrTopOp(null); - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - mapCurrCtx.put((Operator) nd, new GenMapRedCtx( - ctx.getCurrTask(), null, null)); - return null; + if (!seenOps.contains(currTopOp)) { + seenOps.add(currTopOp); + boolean local = false; + if (pos != -1) { + local = (pos == ((MapJoinDesc) op.getConf()).getPosBigTable()) ? false + : true; + } + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); + setupBucketMapJoinInfo(plan, op); } - - ctx.setCurrMapJoinOp(mapJoin); - - Task currTask = ctx.getCurrTask(); - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); - if (mjCtx == null) { - mjCtx = new GenMRMapJoinCtx(); - ctx.setMapJoinCtx(mapJoin, mjCtx); - } - - MapredWork mjPlan = GenMapRedUtils.getMapRedWork(parseCtx); - Task mjTask = TaskFactory.get(mjPlan, parseCtx - .getConf()); - - TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils - .getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol")); - - // generate the temporary file - Context baseCtx = parseCtx.getContext(); - String taskTmpDir = baseCtx.getMRTmpFileURI(); - - // Add the path to alias mapping - mjCtx.setTaskTmpDir(taskTmpDir); - mjCtx.setTTDesc(tt_desc); - mjCtx.setRootMapJoinOp(sel); - - sel.setParentOperators(null); - - // Create a file sink operator for this file name - Operator fs_op = OperatorFactory.get( - new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar( - HiveConf.ConfVars.COMPRESSINTERMEDIATE)), mapJoin.getSchema()); - - assert mapJoin.getChildOperators().size() == 1; - mapJoin.getChildOperators().set(0, fs_op); - - List> parentOpList = - new ArrayList>(); - parentOpList.add(mapJoin); - fs_op.setParentOperators(parentOpList); - - currTask.addDependentTask(mjTask); - - ctx.setCurrTask(mjTask); - ctx.setCurrAliasId(null); - ctx.setCurrTopOp(null); - - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - mapCurrCtx.put((Operator) nd, new GenMapRedCtx( - ctx.getCurrTask(), null, null)); - - return null; + currTopOp = null; + opProcCtx.setCurrTopOp(currTopOp); + opProcCtx.setCurrTask(currTask); } - } - /** - * MapJoin followed by MapJoin. - */ - public static class MapJoinMapJoin implements NodeProcessor { - @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - AbstractMapJoinOperator mapJoin = - (AbstractMapJoinOperator) nd; + Object... nodeOutputs) throws SemanticException { + AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; GenMRProcContext ctx = (GenMRProcContext) procCtx; - ctx.getParseCtx(); - AbstractMapJoinOperator oldMapJoin = ctx.getCurrMapJoinOp(); - - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); - if (mjCtx != null) { - mjCtx.setOldMapJoin(oldMapJoin); - } else { - ctx.setMapJoinCtx(mapJoin, new GenMRMapJoinCtx(null, null, null, - oldMapJoin)); - } - ctx.setCurrMapJoinOp(mapJoin); - // find the branch on which this processor was invoked int pos = getPositionParent(mapJoin, stack); @@ -292,97 +212,29 @@ pos)); Task currTask = mapredCtx.getCurrTask(); MapredWork currPlan = (MapredWork) currTask.getWork(); - mapredCtx.getCurrAliasId(); + Operator currTopOp = mapredCtx.getCurrTopOp(); + String currAliasId = mapredCtx.getCurrAliasId(); Operator reducer = mapJoin; HashMap, Task> opTaskMap = - ctx.getOpTaskMap(); + ctx.getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); + ctx.setCurrTopOp(currTopOp); + ctx.setCurrAliasId(currAliasId); ctx.setCurrTask(currTask); // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { assert currPlan.getReducer() == null; - GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, null, false, pos); + initMapJoinPlan(mapJoin, ctx, pos); } else { // The current plan can be thrown away after being merged with the // original plan - GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, ctx, pos, false, - true, null); + joinMapJoinPlan(mapJoin, opMapTask, ctx, pos); currTask = opMapTask; ctx.setCurrTask(currTask); } - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), null, null)); - return null; - } - } - - /** - * Union followed by MapJoin. - */ - public static class UnionMapJoin implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - GenMRProcContext ctx = (GenMRProcContext) procCtx; - - ParseContext parseCtx = ctx.getParseCtx(); - UnionProcContext uCtx = parseCtx.getUCtx(); - - // union was map only - no special processing needed - if (uCtx.isMapOnlySubq()) { - return (new TableScanMapJoin()) - .process(nd, stack, procCtx, nodeOutputs); - } - - UnionOperator currUnion = Utils.findNode(stack, UnionOperator.class); - assert currUnion != null; - ctx.getUnionTask(currUnion); - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; - - // find the branch on which this processor was invoked - int pos = getPositionParent(mapJoin, stack); - - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get( - pos)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork currPlan = (MapredWork) currTask.getWork(); - Operator reducer = mapJoin; - HashMap, Task> opTaskMap = - ctx.getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); - - // union result cannot be a map table - boolean local = (pos != mapJoin.getConf().getPosBigTable()); - if (local) { - throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_TABLE.getMsg()); - } - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - assert currPlan.getReducer() == null; - ctx.setCurrMapJoinOp(mapJoin); - GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, currUnion, false, pos); - ctx.setCurrUnionOp(null); - } else { - // The current plan can be thrown away after being merged with the - // original plan - Task uTask = ctx.getUnionTask(currUnion).getUTask(); - if (uTask.getId().equals(opMapTask.getId())) { - GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, - false, currUnion); - } else { - GenMapRedUtils.joinPlan(mapJoin, uTask, opMapTask, ctx, pos, false, - false, currUnion); - } - currTask = opMapTask; - ctx.setCurrTask(currTask); - } - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx .getCurrTopOp(), ctx.getCurrAliasId())); return null; @@ -393,23 +245,8 @@ return new TableScanMapJoin(); } - public static NodeProcessor getUnionMapJoin() { - return new UnionMapJoin(); - } - - public static NodeProcessor getReduceSinkMapJoin() { - return new ReduceSinkMapJoin(); - } - - public static NodeProcessor getMapJoin() { - return new MapJoin(); - } - - public static NodeProcessor getMapJoinMapJoin() { - return new MapJoinMapJoin(); - } - private MapJoinFactory() { // prevent instantiation } } + Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (working copy) @@ -71,7 +71,7 @@ if (opMapTask == null) { GenMapRedUtils.splitPlan(op, ctx); } else { - GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true, false, null); + GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true); currTask = opMapTask; ctx.setCurrTask(currTask); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy) @@ -32,12 +32,10 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MapRedTask; import org.apache.hadoop.hive.ql.exec.MoveTask; import org.apache.hadoop.hive.ql.exec.Operator; @@ -52,7 +50,6 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; @@ -67,7 +64,6 @@ import org.apache.hadoop.hive.ql.plan.ExtractDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -774,13 +770,7 @@ private String processFS(Node nd, Stack stack, NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException { - // Is it the dummy file sink after the mapjoin FileSinkOperator fsOp = (FileSinkOperator) nd; - if ((fsOp.getParentOperators().size() == 1) - && (fsOp.getParentOperators().get(0) instanceof MapJoinOperator)) { - return null; - } - GenMRProcContext ctx = (GenMRProcContext) opProcCtx; List seenFSOps = ctx.getSeenFileSinkOps(); if (seenFSOps == null) { @@ -884,24 +874,6 @@ return dest; } - AbstractMapJoinOperator currMapJoinOp = ctx.getCurrMapJoinOp(); - - if (currMapJoinOp != null) { - opTaskMap.put(null, currTask); - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(currMapJoinOp); - MapredWork plan = (MapredWork) currTask.getWork(); - - String taskTmpDir = mjCtx.getTaskTmpDir(); - TableDesc tt_desc = mjCtx.getTTDesc(); - assert plan.getPathToAliases().get(taskTmpDir) == null; - plan.getPathToAliases().put(taskTmpDir, new ArrayList()); - plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, - new PartitionDesc(tt_desc, null)); - plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); - return dest; - } - return dest; } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (working copy) @@ -148,4 +148,14 @@ public OperatorType getType() { return OperatorType.UNION; } + + @Override + public boolean opAllowedBeforeMapJoin() { + return false; + } + + @Override + public boolean opAllowedAfterMapJoin() { + return false; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy) @@ -1429,4 +1429,22 @@ public boolean supportUnionRemoveOptimization() { return false; } + + /* + * This operator is allowed before mapjoin. Eventually, mapjoin hint should be done away with. + * But, since bucketized mapjoin and sortmerge join depend on it completely. it is needed. + * Check the operators which are allowed before mapjoin. + */ + public boolean opAllowedBeforeMapJoin() { + return true; + } + + /* + * This operator is allowed after mapjoin. Eventually, mapjoin hint should be done away with. + * But, since bucketized mapjoin and sortmerge join depend on it completely. it is needed. + * Check the operators which are allowed after mapjoin. + */ + public boolean opAllowedAfterMapJoin() { + return true; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (working copy) @@ -321,4 +321,9 @@ public OperatorType getType() { return OperatorType.REDUCESINK; } + + @Override + public boolean opAllowedBeforeMapJoin() { + return false; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (working copy) @@ -934,4 +934,13 @@ this.posToAliasMap = posToAliasMap; } + @Override + public boolean opAllowedBeforeMapJoin() { + return false; + } + + @Override + public boolean opAllowedAfterMapJoin() { + return false; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -71,7 +71,6 @@ import org.apache.hadoop.hive.ql.exec.RecordWriter; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -106,7 +105,6 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; -import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4; import org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1; import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; @@ -2441,7 +2439,7 @@ boolean subQuery = qb.getParseInfo().getIsSubQ(); if (expr.getType() == HiveParser.TOK_ALLCOLREF) { pos = genColListRegex(".*", expr.getChildCount() == 0 ? null - : getUnescapedName((ASTNode)expr.getChild(0)).toLowerCase(), + : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery); selectStar = true; } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause @@ -2455,7 +2453,7 @@ } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0) - .getChild(0).getText().toLowerCase())) && !hasAsClause + .getChild(0).getText().toLowerCase())) && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(1).getText()))) { // In case the expression is TABLE.COL (col can be regex). @@ -2463,7 +2461,7 @@ // We don't allow this for ExprResolver - the Group By case pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText() - .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch, + .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery); } else { // Case when this is an expression @@ -5113,7 +5111,7 @@ // set the stats publishing/aggregating key prefix // the same as directory name. The directory name - // can be changed in the optimizer but the key should not be changed + // can be changed in the optimizer but the key should not be changed // it should be the same as the MoveWork's sourceDir. fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName()); @@ -8087,24 +8085,9 @@ opRules.put(new RuleRegExp(new String("R6"), UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink3()); - opRules.put(new RuleRegExp(new String("R6"), - MapJoinOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), - new GenMRRedSink4()); opRules.put(new RuleRegExp(new String("R7"), - TableScanOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), + MapJoinOperator.getOperatorName() + "%"), MapJoinFactory.getTableScanMapJoin()); - opRules.put(new RuleRegExp(new String("R8"), - ReduceSinkOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getReduceSinkMapJoin()); - opRules.put(new RuleRegExp(new String("R9"), - UnionOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getUnionMapJoin()); - opRules.put(new RuleRegExp(new String("R10"), - MapJoinOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getMapJoinMapJoin()); - opRules.put(new RuleRegExp(new String("R11"), - MapJoinOperator.getOperatorName() + "%" + SelectOperator.getOperatorName() + "%"), - MapJoinFactory.getMapJoin()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1436745) +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy) @@ -324,6 +324,9 @@ "(higher than the number of rows per input row due to grouping sets in the query), or " + "rewrite the query to not use distincts."), + OPERATOR_NOT_ALLOWED_WITH_MAPJOIN(10227, + "All operators are not allowed with mapjoin hint. Remove the mapjoin hint."), + SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. " + "It may have crashed with an error."),