Index: build.properties =================================================================== --- build.properties (revision 1421079) +++ build.properties (working copy) @@ -79,7 +79,7 @@ # (measured in milliseconds). Ignored if fork is disabled. When running # multiple tests inside the same Java VM (see forkMode), timeout # applies to the time that all tests use together, not to an individual test. -test.junit.timeout=43200000 +test.junit.timeout=432000000 # Use this property to selectively disable tests from the command line: # ant test -Dtest.junit.exclude="**/TestCliDriver.class" Index: ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out (working copy) @@ -71,7 +71,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -93,21 +92,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -162,47 +160,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -233,7 +190,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/join30.q.out =================================================================== --- ql/src/test/results/clientpositive/join30.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/join30.q.out (working copy) @@ -15,14 +15,13 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: x @@ -60,50 +59,39 @@ 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: + Select Operator + expressions: expr: _col0 type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: + outputColumnNames: _col0 + Select Operator + expressions: expr: _col0 type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: @@ -147,7 +135,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator Index: ql/src/test/results/clientpositive/bucketcontext_4.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_4.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketcontext_4.q.out (working copy) @@ -81,13 +81,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -133,21 +132,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -204,47 +202,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -275,7 +232,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator @@ -304,7 +261,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -326,21 +282,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -395,47 +350,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -466,7 +380,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin9.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin9.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketmapjoin9.q.out (working copy) @@ -70,13 +70,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -115,21 +114,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -184,47 +182,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -255,7 +212,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator @@ -332,13 +289,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -377,21 +333,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -446,47 +401,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -517,7 +431,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin13.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin13.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketmapjoin13.q.out (working copy) @@ -98,13 +98,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -143,21 +142,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -260,48 +258,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -332,7 +288,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -386,13 +343,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '2')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -438,21 +394,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -507,47 +462,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -578,7 +492,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -644,13 +558,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -696,21 +609,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -765,47 +677,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -836,7 +707,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -904,13 +775,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -956,21 +826,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -1025,47 +894,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -1096,7 +924,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/smb_mapjoin_13.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (working copy) @@ -77,7 +77,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -100,21 +99,43 @@ 1 [Column[value]] outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -165,70 +186,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table1 name: default.test_table1 - Truncated Path -> Alias: - /test_table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col4, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ Reduce Operator Tree: Extract Limit @@ -250,7 +207,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /test_table1 [a] Stage: Stage-0 Fetch Operator @@ -307,13 +264,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table3) a) (TOK_TABREF (TOK_TABNAME test_table4) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -353,21 +309,43 @@ 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[value]()] outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string Local Work: Map Reduce Local Work Needs Tagging: false @@ -420,70 +398,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 name: default.test_table3 - Truncated Path -> Alias: - /test_table3 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col4, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col4 - type: int - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col2 - type: int - expr: _col3 - type: string - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col4,_col5 - columns.types int,string,int,string - escape.delim \ Reduce Operator Tree: Extract Limit @@ -505,7 +419,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /test_table3 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out (working copy) @@ -64,13 +64,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table_desc1) a) (TOK_TABREF (TOK_TABNAME table_desc2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 10)))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -116,37 +115,26 @@ 1 [Column[key], Column[value]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/skewjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoin.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/skewjoin.q.out (working copy) @@ -1524,13 +1524,12 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) k) (TOK_TABREF (TOK_TABNAME T1) v) (= (+ (. (TOK_TABLE_OR_COL k) key) 1) (. (TOK_TABLE_OR_COL v) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST v))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL k) key)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL v) val))))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: v @@ -1568,48 +1567,37 @@ 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] outputColumnNames: _col0, _col5 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col5 + Group By Operator + aggregations: + expr: sum(hash(_col0)) + expr: sum(hash(_col5)) + bucketGroup: false + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + expr: _col1 + type: bigint Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col5)) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - expr: _col1 - type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/bucketcontext_8.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_8.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketcontext_8.q.out (working copy) @@ -94,13 +94,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -146,21 +145,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -267,48 +265,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -339,7 +295,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -370,7 +327,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -392,21 +348,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -511,48 +466,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -583,7 +496,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketcontext_3.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_3.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketcontext_3.q.out (working copy) @@ -69,13 +69,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -121,21 +120,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -192,47 +190,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -263,7 +220,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator @@ -292,7 +249,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -314,21 +270,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -383,47 +338,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -454,7 +368,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin8.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin8.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketmapjoin8.q.out (working copy) @@ -64,13 +64,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -116,21 +115,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -185,47 +183,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -256,7 +213,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator @@ -304,13 +261,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -356,21 +312,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -425,47 +380,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -496,7 +410,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin12.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin12.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketmapjoin12.q.out (working copy) @@ -92,13 +92,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -144,21 +143,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -213,47 +211,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -284,7 +241,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator @@ -324,13 +281,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_3) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -369,21 +325,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -438,47 +393,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -509,7 +423,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucket_map_join_2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucket_map_join_2.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucket_map_join_2.q.out (working copy) @@ -50,13 +50,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -95,21 +94,20 @@ 0 [Column[key], Column[value]] 1 [Column[key], Column[value]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -162,47 +160,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.table1 name: default.table1 - Truncated Path -> Alias: - /table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -233,7 +190,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /table1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out (working copy) @@ -67,7 +67,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -93,35 +92,24 @@ 1 [Column[key], Column[value]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 - type: bigint + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/join38.q.out =================================================================== --- ql/src/test/results/clientpositive/join38.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/join38.q.out (working copy) @@ -73,13 +73,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME tmp) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) col11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) col5)) (TOK_SELEXPR (TOK_FUNCTION count 1) count)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) col11) 111)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) col5)))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -125,62 +124,51 @@ 1 [Column[col11]] outputColumnNames: _col1, _col9, _col15 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - expr: _col9 - type: string - expr: _col15 - type: string - outputColumnNames: _col1, _col9, _col15 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col9 - type: string - outputColumnNames: _col1, _col9 - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - keys: - expr: _col1 - type: string - expr: _col9 - type: string - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: - expr: _col0 - type: string + Select Operator + expressions: expr: _col1 type: string - sort order: ++ - Map-reduce partition columns: - expr: _col0 + expr: _col9 type: string - expr: _col1 + expr: _col15 type: string - tag: -1 - value expressions: - expr: _col2 - type: bigint + outputColumnNames: _col1, _col9, _col15 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col9 + type: string + outputColumnNames: _col1, _col9 + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: _col1 + type: string + expr: _col9 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out (working copy) @@ -56,13 +56,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1)) (TOK_TABREF (TOK_TABNAME src1) src2) (AND (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)) (< (. (TOK_TABLE_OR_COL src1) key) 10)) (> (. (TOK_TABLE_OR_COL src2) key) 10))) (TOK_TABREF (TOK_TABNAME src) src3) (AND (= (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src3) key)) (< (. (TOK_TABLE_OR_COL src3) key) 300)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src1 src2))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) key))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -146,72 +145,61 @@ 2 [Column[key]] outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 Position of Big Table: 2 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: string + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col2 + type: string + expr: _col4 + type: string + sort order: +++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - expr: _col8 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: string - expr: _col4 - type: string - sort order: +++ - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col4 - type: string - expr: _col5 - type: string Reduce Operator Tree: Extract File Output Operator Index: ql/src/test/results/clientpositive/bucketcontext_7.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_7.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketcontext_7.q.out (working copy) @@ -94,13 +94,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -146,21 +145,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -267,48 +265,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -339,7 +295,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -370,7 +327,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -392,21 +348,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -511,48 +466,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -583,7 +496,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketcontext_2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_2.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketcontext_2.q.out (working copy) @@ -69,13 +69,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -121,21 +120,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -242,48 +240,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -314,7 +270,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -343,7 +300,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -365,21 +321,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -484,48 +439,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -556,7 +469,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin11.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin11.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketmapjoin11.q.out (working copy) @@ -124,13 +124,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -176,21 +175,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -293,48 +291,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -365,7 +321,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator @@ -405,13 +362,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) (. (TOK_TABLE_OR_COL b) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -457,21 +413,20 @@ 0 [Column[key], Column[part]] 1 [Column[key], Column[part]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -574,48 +529,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -646,7 +559,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/smb_mapjoin_16.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_16.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_16.q.out (working copy) @@ -0,0 +1,120 @@ +PREHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1028 Index: ql/src/test/results/clientpositive/bucket_map_join_1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucket_map_join_1.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucket_map_join_1.q.out (working copy) @@ -50,13 +50,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -95,21 +94,20 @@ 0 [Column[key], Column[value]] 1 [Column[key], Column[value]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -162,47 +160,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.table1 name: default.table1 - Truncated Path -> Alias: - /table1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -233,7 +190,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /table1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out (working copy) @@ -134,13 +134,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -179,21 +178,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -296,48 +294,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -368,7 +324,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out (working copy) @@ -67,7 +67,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -93,35 +92,24 @@ 1 [Column[key], Column[value]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 - type: bigint + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/bucketcontext_6.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_6.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketcontext_6.q.out (working copy) @@ -68,13 +68,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -120,21 +119,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -241,48 +239,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -313,7 +269,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -342,7 +299,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -364,21 +320,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -483,48 +438,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -555,7 +468,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketcontext_1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_1.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketcontext_1.q.out (working copy) @@ -81,13 +81,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -133,21 +132,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -254,48 +252,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -326,7 +282,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator @@ -355,7 +312,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -377,21 +333,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -496,48 +451,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -568,7 +481,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/bucketmapjoin10.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin10.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketmapjoin10.q.out (working copy) @@ -118,13 +118,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -163,21 +162,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -280,48 +278,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -352,7 +308,8 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/mapjoin_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_distinct.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/mapjoin_distinct.q.out (working copy) @@ -14,14 +14,13 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: d @@ -59,45 +58,34 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Group By Operator - bucketGroup: false - keys: + Select Operator + expressions: expr: _col1 type: string - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 type: string - sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 + outputColumnNames: _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator bucketGroup: false @@ -113,7 +101,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -193,13 +181,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: d @@ -237,45 +224,34 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Group By Operator - bucketGroup: false - keys: + Select Operator + expressions: expr: _col1 type: string - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 + outputColumnNames: _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator bucketGroup: false @@ -343,14 +319,13 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: d @@ -388,38 +363,27 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: rand() - type: double - tag: -1 Reduce Operator Tree: Group By Operator bucketGroup: false @@ -435,7 +399,7 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Stage: Stage-3 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### @@ -515,13 +479,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: d @@ -559,38 +522,27 @@ 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: -1 Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Select Operator - expressions: - expr: _col1 - type: string - outputColumnNames: _col1 - Reduce Output Operator - key expressions: - expr: _col1 - type: string - sort order: + - Map-reduce partition columns: - expr: _col1 - type: string - tag: -1 Reduce Operator Tree: Group By Operator bucketGroup: false Index: ql/src/test/results/clientpositive/semijoin.q.out =================================================================== --- ql/src/test/results/clientpositive/semijoin.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/semijoin.q.out (working copy) @@ -1137,13 +1137,12 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -1193,38 +1192,27 @@ 1 [Column[_col0]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int Reduce Operator Tree: Extract File Output Operator @@ -1712,13 +1700,12 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t2) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b c))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: b @@ -1802,38 +1789,27 @@ 2 [Column[_col0]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: int Local Work: Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: int Reduce Operator Tree: Extract File Output Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out (working copy) @@ -70,13 +70,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: b @@ -122,21 +121,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -193,47 +191,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -264,7 +221,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /srcbucket_mapjoin_part_1/part=1 [a] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out (working copy) @@ -59,7 +59,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -85,35 +84,24 @@ 1 [Column[key]] outputColumnNames: _col0 Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 - type: bigint + type: string + outputColumnNames: _col0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/results/clientpositive/bucketcontext_5.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketcontext_5.q.out (revision 1421079) +++ ql/src/test/results/clientpositive/bucketcontext_5.q.out (working copy) @@ -54,13 +54,12 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Local Work Alias -> Map Local Tables: a @@ -106,21 +105,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Local Work: Map Reduce Local Work Needs Tagging: false @@ -173,47 +171,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -244,7 +201,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big [b] Stage: Stage-0 Fetch Operator @@ -271,7 +228,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -293,21 +249,20 @@ 0 [Column[key]] 1 [Column[key]] Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Select Operator + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -358,47 +313,6 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big [b] - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Needs Tagging: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns - columns.types - escape.delim \ Reduce Operator Tree: Group By Operator aggregations: @@ -429,7 +343,7 @@ GatherStats: false MultiFileSpray: false Truncated Path -> Alias: -#### A masked pattern was here #### + /bucket_big [b] Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientnegative/join29.q.out =================================================================== --- ql/src/test/results/clientnegative/join29.q.out (revision 0) +++ ql/src/test/results/clientnegative/join29.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed before mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join33.q.out =================================================================== --- ql/src/test/results/clientnegative/join33.q.out (revision 0) +++ ql/src/test/results/clientnegative/join33.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10212]: All operators are not allowed after mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join34.q.out =================================================================== --- ql/src/test/results/clientnegative/join34.q.out (revision 0) +++ ql/src/test/results/clientnegative/join34.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed before mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join35.q.out =================================================================== --- ql/src/test/results/clientnegative/join35.q.out (revision 0) +++ ql/src/test/results/clientnegative/join35.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed before mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/mapjoin_subquery2.q.out =================================================================== --- ql/src/test/results/clientnegative/mapjoin_subquery2.q.out (revision 0) +++ ql/src/test/results/clientnegative/mapjoin_subquery2.q.out (working copy) @@ -0,0 +1,52 @@ +PREHOOK: query: drop table x +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table x +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table y +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table y +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table z +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table z +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE x (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE x (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@x +PREHOOK: query: CREATE TABLE y (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE y (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@y +PREHOOK: query: CREATE TABLE z (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE z (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@z +PREHOOK: query: load data local inpath '../data/files/x.txt' INTO TABLE x +PREHOOK: type: LOAD +PREHOOK: Output: default@x +POSTHOOK: query: load data local inpath '../data/files/x.txt' INTO TABLE x +POSTHOOK: type: LOAD +POSTHOOK: Output: default@x +PREHOOK: query: load data local inpath '../data/files/y.txt' INTO TABLE y +PREHOOK: type: LOAD +PREHOOK: Output: default@y +POSTHOOK: query: load data local inpath '../data/files/y.txt' INTO TABLE y +POSTHOOK: type: LOAD +POSTHOOK: Output: default@y +PREHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z +PREHOOK: type: LOAD +PREHOOK: Output: default@z +POSTHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z +POSTHOOK: type: LOAD +POSTHOOK: Output: default@z +FAILED: SemanticException [Error 10212]: All operators are not allowed after mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/mapjoin_subquery.q.out =================================================================== --- ql/src/test/results/clientnegative/mapjoin_subquery.q.out (revision 0) +++ ql/src/test/results/clientnegative/mapjoin_subquery.q.out (working copy) @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10211]: All operators are not allowed before mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join31.q.out =================================================================== --- ql/src/test/results/clientnegative/join31.q.out (revision 0) +++ ql/src/test/results/clientnegative/join31.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, cnt INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10211]: All operators are not allowed before mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join28.q.out =================================================================== --- ql/src/test/results/clientnegative/join28.q.out (revision 0) +++ ql/src/test/results/clientnegative/join28.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10212]: All operators are not allowed after mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/union22.q.out =================================================================== --- ql/src/test/results/clientnegative/union22.q.out (revision 0) +++ ql/src/test/results/clientnegative/union22.q.out (working copy) @@ -0,0 +1,45 @@ +PREHOOK: query: create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dst_union22 +PREHOOK: query: create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dst_union22_delta +PREHOOK: query: insert overwrite table dst_union22 partition (ds='1') +select key, value, key , value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dst_union22@ds=1 +POSTHOOK: query: insert overwrite table dst_union22 partition (ds='1') +select key, value, key , value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dst_union22@ds=1 +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table dst_union22_delta partition (ds='1') +select key, key, value, key, value, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dst_union22_delta@ds=1 +POSTHOOK: query: insert overwrite table dst_union22_delta partition (ds='1') +select key, key, value, key, value, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dst_union22_delta@ds=1 +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException [Error 10212]: All operators are not allowed after mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/results/clientnegative/join32.q.out =================================================================== --- ql/src/test/results/clientnegative/join32.q.out (revision 0) +++ ql/src/test/results/clientnegative/join32.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_j1 +FAILED: SemanticException [Error 10212]: All operators are not allowed after mapjoin hint. Remove the mapjoin hint. Index: ql/src/test/queries/clientpositive/join32.q =================================================================== --- ql/src/test/queries/clientpositive/join32.q (revision 1421079) +++ ql/src/test/queries/clientpositive/join32.q (working copy) @@ -1,17 +0,0 @@ -CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; - -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); - -select * from dest_j1 x order by x.key; - - - Index: ql/src/test/queries/clientpositive/mapjoin_subquery2.q =================================================================== --- ql/src/test/queries/clientpositive/mapjoin_subquery2.q (revision 1421079) +++ ql/src/test/queries/clientpositive/mapjoin_subquery2.q (working copy) @@ -1,39 +0,0 @@ -drop table x; -drop table y; -drop table z; - -CREATE TABLE x (name STRING, id INT) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; - -CREATE TABLE y (id INT, name STRING) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; - -CREATE TABLE z (id INT, name STRING) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; - -load data local inpath '../data/files/x.txt' INTO TABLE x; -load data local inpath '../data/files/y.txt' INTO TABLE y; -load data local inpath '../data/files/z.txt' INTO TABLE z; - -SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id); - -EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id); - -SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name -FROM -(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 - FROM y JOIN x ON (x.id = y.id)) subq - JOIN z ON (subq.key1 = z.id); - -drop table x; -drop table y; -drop table z; Index: ql/src/test/queries/clientpositive/join29.q =================================================================== --- ql/src/test/queries/clientpositive/join29.q (revision 1421079) +++ ql/src/test/queries/clientpositive/join29.q (working copy) @@ -1,14 +0,0 @@ -CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT); - -EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key); - -select * from dest_j1 x order by x.key; Index: ql/src/test/queries/clientpositive/mapjoin_subquery.q =================================================================== --- ql/src/test/queries/clientpositive/mapjoin_subquery.q (revision 1421079) +++ ql/src/test/queries/clientpositive/mapjoin_subquery.q (working copy) @@ -1,28 +0,0 @@ -EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); - -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); - -EXPLAIN -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1; - - -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) - order by subq.key1; Index: ql/src/test/queries/clientpositive/union22.q =================================================================== --- ql/src/test/queries/clientpositive/union22.q (revision 1421079) +++ ql/src/test/queries/clientpositive/union22.q (working copy) @@ -1,41 +0,0 @@ - -create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string); - - -create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string); - -insert overwrite table dst_union22 partition (ds='1') -select key, value, key , value from src; - -insert overwrite table dst_union22_delta partition (ds='1') -select key, key, value, key, value, value from src; - -set hive.merge.mapfiles=false; - -explain extended -insert overwrite table dst_union22 partition (ds='2') -select * from -( -select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 -union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 -from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on -a.k1 = b.k1 and a.ds='1' -where a.k1 > 20 -) -subq; - -insert overwrite table dst_union22 partition (ds='2') -select * from -( -select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 -union all -select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 -from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on -a.k1 = b.k1 and a.ds='1' -where a.k1 > 20 -) -subq; - - -select * from dst_union22 where ds = '2' order by k1, k2, k3, k4; Index: ql/src/test/queries/clientpositive/join33.q =================================================================== --- ql/src/test/queries/clientpositive/join33.q (revision 1421079) +++ ql/src/test/queries/clientpositive/join33.q (working copy) @@ -1,17 +0,0 @@ -CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; - -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); - -select * from dest_j1 x order by x.key; - - - Index: ql/src/test/queries/clientpositive/join34.q =================================================================== --- ql/src/test/queries/clientpositive/join34.q (revision 1421079) +++ ql/src/test/queries/clientpositive/join34.q (working copy) @@ -1,27 +0,0 @@ - - -CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; - -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value -FROM -( SELECT x.key as key, x.value as value from src x where x.key < 20 - UNION ALL - SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 -) subq1 -JOIN src1 x ON (x.key = subq1.key); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value -FROM -( SELECT x.key as key, x.value as value from src x where x.key < 20 - UNION ALL - SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 -) subq1 -JOIN src1 x ON (x.key = subq1.key); - -select * from dest_j1 x order by x.key; - - - Index: ql/src/test/queries/clientpositive/join31.q =================================================================== --- ql/src/test/queries/clientpositive/join31.q (revision 1421079) +++ ql/src/test/queries/clientpositive/join31.q (working copy) @@ -1,16 +0,0 @@ -CREATE TABLE dest_j1(key STRING, cnt INT); - -EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -group by subq1.key; - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt -FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN - (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) -group by subq1.key; - -select * from dest_j1 x order by x.key; Index: ql/src/test/queries/clientpositive/join35.q =================================================================== --- ql/src/test/queries/clientpositive/join35.q (revision 1421079) +++ ql/src/test/queries/clientpositive/join35.q (working copy) @@ -1,27 +0,0 @@ - - -CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE; - -EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt -FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key - UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key -) subq1 -JOIN src1 x ON (x.key = subq1.key); - -select * from dest_j1 x order by x.key; - - - Index: ql/src/test/queries/clientpositive/join28.q =================================================================== --- ql/src/test/queries/clientpositive/join28.q (revision 1421079) +++ ql/src/test/queries/clientpositive/join28.q (working copy) @@ -1,23 +0,0 @@ - - -CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE; - -EXPLAIN -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); - -INSERT OVERWRITE TABLE dest_j1 -SELECT /*+ MAPJOIN(z) */ subq.key1, z.value -FROM -(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 - FROM src1 x JOIN src y ON (x.key = y.key)) subq - JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); - -select * from dest_j1 x order by x.key; - - - Index: ql/src/test/queries/clientpositive/smb_mapjoin_16.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_16.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_16.q (working copy) @@ -0,0 +1,21 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT *; + +-- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key; +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key; Index: ql/src/test/queries/clientnegative/join31.q =================================================================== --- ql/src/test/queries/clientnegative/join31.q (revision 0) +++ ql/src/test/queries/clientnegative/join31.q (working copy) @@ -0,0 +1,9 @@ +CREATE TABLE dest_j1(key STRING, cnt INT); + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) +group by subq1.key; + Index: ql/src/test/queries/clientnegative/union22.q =================================================================== --- ql/src/test/queries/clientnegative/union22.q (revision 0) +++ ql/src/test/queries/clientnegative/union22.q (working copy) @@ -0,0 +1,23 @@ +create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string); +create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string); + +insert overwrite table dst_union22 partition (ds='1') +select key, value, key , value from src; + +insert overwrite table dst_union22_delta partition (ds='1') +select key, key, value, key, value, value from src; + +set hive.merge.mapfiles=false; + +explain extended +insert overwrite table dst_union22 partition (ds='2') +select * from +( +select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50 +union all +select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4 +from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on +a.k1 = b.k1 and a.ds='1' +where a.k1 > 20 +) +subq; Index: ql/src/test/queries/clientnegative/join32.q =================================================================== --- ql/src/test/queries/clientnegative/join32.q (revision 0) +++ ql/src/test/queries/clientnegative/join32.q (working copy) @@ -0,0 +1,11 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + + + + Index: ql/src/test/queries/clientnegative/join33.q =================================================================== --- ql/src/test/queries/clientnegative/join33.q (revision 0) +++ ql/src/test/queries/clientnegative/join33.q (working copy) @@ -0,0 +1,7 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); Index: ql/src/test/queries/clientnegative/join34.q =================================================================== --- ql/src/test/queries/clientnegative/join34.q (revision 0) +++ ql/src/test/queries/clientnegative/join34.q (working copy) @@ -0,0 +1,14 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value +FROM +( SELECT x.key as key, x.value as value from src x where x.key < 20 + UNION ALL + SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 +) subq1 +JOIN src1 x ON (x.key = subq1.key); + + + Index: ql/src/test/queries/clientnegative/join35.q =================================================================== --- ql/src/test/queries/clientnegative/join35.q (revision 0) +++ ql/src/test/queries/clientnegative/join35.q (working copy) @@ -0,0 +1,15 @@ +CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key); + + + + Index: ql/src/test/queries/clientnegative/join28.q =================================================================== --- ql/src/test/queries/clientnegative/join28.q (revision 0) +++ ql/src/test/queries/clientnegative/join28.q (working copy) @@ -0,0 +1,12 @@ +CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE; + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); + + + Index: ql/src/test/queries/clientnegative/mapjoin_subquery2.q =================================================================== --- ql/src/test/queries/clientnegative/mapjoin_subquery2.q (revision 0) +++ ql/src/test/queries/clientnegative/mapjoin_subquery2.q (working copy) @@ -0,0 +1,23 @@ +drop table x; +drop table y; +drop table z; + +CREATE TABLE x (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; + +CREATE TABLE y (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; + +CREATE TABLE z (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; + +load data local inpath '../data/files/x.txt' INTO TABLE x; +load data local inpath '../data/files/y.txt' INTO TABLE y; +load data local inpath '../data/files/z.txt' INTO TABLE z; + +EXPLAIN +SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +FROM +(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2 + FROM y JOIN x ON (x.id = y.id)) subq + JOIN z ON (subq.key1 = z.id); Index: ql/src/test/queries/clientnegative/join29.q =================================================================== --- ql/src/test/queries/clientnegative/join29.q (revision 0) +++ ql/src/test/queries/clientnegative/join29.q (working copy) @@ -0,0 +1,7 @@ +CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT); + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key); Index: ql/src/test/queries/clientnegative/mapjoin_subquery.q =================================================================== --- ql/src/test/queries/clientnegative/mapjoin_subquery.q (revision 0) +++ ql/src/test/queries/clientnegative/mapjoin_subquery.q (working copy) @@ -0,0 +1,6 @@ +EXPLAIN +SELECT /*+ MAPJOIN(z) */ subq.key1, z.value +FROM +(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (working copy) @@ -1,99 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer; - -import java.io.Serializable; -import java.util.HashMap; -import java.util.Map; -import java.util.Stack; - -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; - -/** - * Processor for the rule - map join followed by reduce sink. - */ -public class GenMRRedSink4 implements NodeProcessor { - - public GenMRRedSink4() { - } - - /** - * Reduce Scan encountered. - * - * @param nd - * the reduce sink operator encountered - * @param opProcCtx - * context - */ - public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, - Object... nodeOutputs) throws SemanticException { - ReduceSinkOperator op = (ReduceSinkOperator) nd; - GenMRProcContext ctx = (GenMRProcContext) opProcCtx; - - ctx.getParseCtx(); - - // map-join consisted on a bunch of map-only jobs, and it has been split - // after the mapjoin - Operator reducer = op.getChildOperators().get(0); - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork plan = (MapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = ctx - .getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); - - ctx.setCurrTask(currTask); - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - // When the reducer is encountered for the first time - if (plan.getReducer() == null) { - GenMapRedUtils.initMapJoinPlan(op, ctx, true, false, true, -1); - // When mapjoin is followed by a multi-table insert - } else { - GenMapRedUtils.splitPlan(op, ctx); - } - } else { - // There is a join after mapjoin. One of the branches of mapjoin has already - // been initialized. - // Initialize the current branch, and join with the original plan. - assert plan.getReducer() != reducer; - GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, false, true, - false); - } - - mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), - ctx.getCurrAliasId())); - - // the mapjoin operator has been processed - ctx.setCurrMapJoinOp(null); - return null; - } -} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (working copy) @@ -39,16 +39,13 @@ private final transient boolean[] mapOnlySubq; private final transient boolean[] mapOnlySubqSet; private final transient boolean[] rootTask; - private final transient boolean[] mapJoinSubq; private transient int numInputs; - private transient boolean mapJoinQuery; public UnionParseContext(int numInputs) { this.numInputs = numInputs; mapOnlySubq = new boolean[numInputs]; rootTask = new boolean[numInputs]; - mapJoinSubq = new boolean[numInputs]; mapOnlySubqSet = new boolean[numInputs]; } @@ -61,21 +58,6 @@ this.mapOnlySubqSet[pos] = true; } - public boolean getMapJoinSubq(int pos) { - return mapJoinSubq[pos]; - } - - public void setMapJoinSubq(int pos, boolean mapJoinSubq) { - this.mapJoinSubq[pos] = mapJoinSubq; - if (mapJoinSubq) { - mapJoinQuery = true; - } - } - - public boolean getMapJoinQuery() { - return mapJoinQuery; - } - public boolean getRootTask(int pos) { return rootTask[pos]; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (working copy) @@ -107,30 +107,6 @@ } /** - * Map-join subquery followed by Union. - */ - public static class MapJoinUnion implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - UnionOperator union = (UnionOperator) nd; - UnionProcContext ctx = (UnionProcContext) procCtx; - - // find the branch on which this processor was invoked - int pos = getPositionParent(union, stack); - UnionParseContext uCtx = ctx.getUnionParseContext(union); - if (uCtx == null) { - uCtx = new UnionParseContext(union.getConf().getNumInputs()); - } - - uCtx.setMapJoinSubq(pos, true); - ctx.setUnionParseContext(union, uCtx); - return null; - } - } - - /** * Union subquery followed by Union. */ public static class UnknownUnion implements NodeProcessor { @@ -330,10 +306,6 @@ return new MapUnion(); } - public static NodeProcessor getMapJoinUnion() { - return new MapJoinUnion(); - } - public static NodeProcessor getUnknownUnion() { return new UnknownUnion(); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (working copy) @@ -25,7 +25,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; @@ -79,9 +78,6 @@ opRules.put(new RuleRegExp("R3", TableScanOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"), UnionProcFactory.getMapUnion()); - opRules.put(new RuleRegExp("R4", - MapJoinOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"), - UnionProcFactory.getMapJoinUnion()); // The dispatcher fires the processor for the matching rule and passes the // context along Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -226,7 +226,7 @@ QBJoinTree newJoinTree = newWork.getJoinTree(); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = MapJoinProcessor.convertMapJoin(opParseCtxMap, op, - newJoinTree, mapJoinPos, true); + newJoinTree, mapJoinPos, true, false); // generate the local work and return the big table alias String bigTableAlias = MapJoinProcessor .genMapJoinLocalWork(newWork, newMapJoinOp, mapJoinPos); @@ -240,9 +240,43 @@ e.printStackTrace(); throw new SemanticException("Generate New MapJoin Opertor Exeception " + e.getMessage()); } + } + private static void checkParentOperatorType(Operator op) + throws SemanticException { + if (!op.opAllowedBeforeMapJoin()) { + throw new SemanticException(ErrorMsg.OPERATOR_NOT_ALLOWED_BEFORE_MAPJOIN.getMsg()); + } + if (op.getParentOperators() != null) { + for (Operator parentOp : op.getParentOperators()) { + checkParentOperatorType(parentOp); + } + } } + private static void checkChildOperatorType(Operator op) + throws SemanticException { + if (!op.opAllowedAfterMapJoin()) { + throw new SemanticException(ErrorMsg.OPERATOR_NOT_ALLOWED_AFTER_MAPJOIN.getMsg()); + } + if (op.getChildOperators() != null) { + for (Operator childOp : op.getChildOperators()) { + checkChildOperatorType(childOp); + } + } + } + + private static void validateMapJoinTypes(Operator op) + throws SemanticException { + for (Operator parentOp : op.getParentOperators()) { + checkParentOperatorType(parentOp); + } + + for (Operator childOp : op.getChildOperators()) { + checkChildOperatorType(childOp); + } + } + /** * convert a regular join to a a map-side join. * @@ -258,8 +292,10 @@ */ public static MapJoinOperator convertMapJoin( LinkedHashMap, OpParseContext> opParseCtxMap, - JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin) + JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin, + boolean validateMapJoinTree) throws SemanticException { + // outer join cannot be performed on a table which is being cached JoinDesc desc = op.getConf(); JoinCondDesc[] condns = desc.getConds(); @@ -477,6 +513,11 @@ op.setChildOperators(null); op.setParentOperators(null); + // make sure only map-joins can be performed. + if (validateMapJoinTree) { + validateMapJoinTypes(mapJoinOp); + } + return mapJoinOp; } @@ -487,11 +528,10 @@ HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN); - LinkedHashMap, OpParseContext> opParseCtxMap = pctx .getOpParseCtx(); MapJoinOperator mapJoinOp = convertMapJoin(opParseCtxMap, op, joinTree, mapJoinPos, - noCheckOuterJoin); + noCheckOuterJoin, true); // create a dummy select to select all columns genSelectPlan(pctx, mapJoinOp); return mapJoinOp; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (working copy) @@ -27,7 +27,6 @@ import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -40,7 +39,6 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -155,90 +153,10 @@ } } - /** - * GenMRMapJoinCtx. - * - */ - public static class GenMRMapJoinCtx { - String taskTmpDir; - TableDesc tt_desc; - Operator rootMapJoinOp; - AbstractMapJoinOperator oldMapJoin; - - public GenMRMapJoinCtx() { - taskTmpDir = null; - tt_desc = null; - rootMapJoinOp = null; - oldMapJoin = null; - } - - /** - * @param taskTmpDir - * @param tt_desc - * @param rootMapJoinOp - * @param oldMapJoin - */ - public GenMRMapJoinCtx(String taskTmpDir, TableDesc tt_desc, - Operator rootMapJoinOp, - AbstractMapJoinOperator oldMapJoin) { - this.taskTmpDir = taskTmpDir; - this.tt_desc = tt_desc; - this.rootMapJoinOp = rootMapJoinOp; - this.oldMapJoin = oldMapJoin; - } - - public void setTaskTmpDir(String taskTmpDir) { - this.taskTmpDir = taskTmpDir; - } - - public String getTaskTmpDir() { - return taskTmpDir; - } - - public void setTTDesc(TableDesc tt_desc) { - this.tt_desc = tt_desc; - } - - public TableDesc getTTDesc() { - return tt_desc; - } - - /** - * @return the childSelect - */ - public Operator getRootMapJoinOp() { - return rootMapJoinOp; - } - - /** - * @param rootMapJoinOp - * the rootMapJoinOp to set - */ - public void setRootMapJoinOp(Operator rootMapJoinOp) { - this.rootMapJoinOp = rootMapJoinOp; - } - - /** - * @return the oldMapJoin - */ - public AbstractMapJoinOperator getOldMapJoin() { - return oldMapJoin; - } - - /** - * @param oldMapJoin - * the oldMapJoin to set - */ - public void setOldMapJoin(AbstractMapJoinOperator oldMapJoin) { - this.oldMapJoin = oldMapJoin; - } - } - private HiveConf conf; private HashMap, Task> opTaskMap; private HashMap unionTaskMap; - private HashMap, GenMRMapJoinCtx> mapJoinTaskMap; private List> seenOps; private List seenFileSinkOps; @@ -250,7 +168,6 @@ private Task currTask; private Operator currTopOp; private UnionOperator currUnionOp; - private AbstractMapJoinOperator currMapJoinOp; private String currAliasId; private List> rootOps; private DependencyCollectionTask dependencyTaskForMultiInsert; @@ -313,12 +230,10 @@ currTask = null; currTopOp = null; currUnionOp = null; - currMapJoinOp = null; currAliasId = null; rootOps = new ArrayList>(); rootOps.addAll(parseCtx.getTopOps().values()); unionTaskMap = new HashMap(); - mapJoinTaskMap = new HashMap, GenMRMapJoinCtx>(); dependencyTaskForMultiInsert = null; linkedFileDescTasks = null; } @@ -488,19 +403,7 @@ this.currUnionOp = currUnionOp; } - public AbstractMapJoinOperator getCurrMapJoinOp() { - return currMapJoinOp; - } - /** - * @param currMapJoinOp - * current map join operator - */ - public void setCurrMapJoinOp(AbstractMapJoinOperator currMapJoinOp) { - this.currMapJoinOp = currMapJoinOp; - } - - /** * @return current top alias */ public String getCurrAliasId() { @@ -523,14 +426,6 @@ unionTaskMap.put(op, uTask); } - public GenMRMapJoinCtx getMapJoinCtx(AbstractMapJoinOperator op) { - return mapJoinTaskMap.get(op); - } - - public void setMapJoinCtx(AbstractMapJoinOperator op, GenMRMapJoinCtx mjCtx) { - mapJoinTaskMap.put(op, mjCtx); - } - /** * Get the input set. */ Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (working copy) @@ -26,7 +26,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.Task; @@ -35,7 +34,6 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; @@ -44,10 +42,8 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; @@ -84,16 +80,10 @@ } UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union); - if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) { - GenMapRedUtils.mergeMapJoinUnion(union, ctx, - UnionProcFactory.getPositionParent(union, stack)); - } - else { - ctx.getMapCurrCtx().put( - (Operator) union, - new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), - ctx.getCurrAliasId())); - } + ctx.getMapCurrCtx().put( + (Operator) union, + new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), + ctx.getCurrAliasId())); // if the union is the first time seen, set current task to GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); @@ -103,7 +93,7 @@ ctx.setUnionTask(union, uCtxTask); } - Task uTask=ctx.getCurrTask(); + Task uTask = ctx.getCurrTask(); if (uTask.getParentTasks() == null || uTask.getParentTasks().isEmpty()) { if (!ctx.getRootTasks().contains(uTask)) { @@ -134,8 +124,9 @@ GenMRUnionCtx uCtxTask) { ParseContext parseCtx = ctx.getParseCtx(); - TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema( - parent.getSchema(), "temporarycol")); + TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils + .getFieldSchemasFromRowSchema( + parent.getSchema(), "temporarycol")); // generate the temporary file Context baseCtx = parseCtx.getContext(); @@ -150,7 +141,7 @@ parent.getChildOperators().set(0, fs_op); List> parentOpList = - new ArrayList>(); + new ArrayList>(); parentOpList.add(parent); fs_op.setParentOperators(parentOpList); @@ -158,7 +149,7 @@ Operator ts_op = OperatorFactory.get( new TableScanDesc(), parent.getSchema()); List> childOpList = - new ArrayList>(); + new ArrayList>(); childOpList.add(child); ts_op.setChildOperators(childOpList); child.replaceParent(parent, ts_op); @@ -211,27 +202,9 @@ } } - private void processSubQueryUnionMapJoin(GenMRProcContext ctx) { - AbstractMapJoinOperator mjOp = ctx.getCurrMapJoinOp(); - assert mjOp != null; - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mjOp); - assert mjCtx != null; - MapredWork plan = (MapredWork) ctx.getCurrTask().getWork(); - - String taskTmpDir = mjCtx.getTaskTmpDir(); - TableDesc tt_desc = mjCtx.getTTDesc(); - assert plan.getPathToAliases().get(taskTmpDir) == null; - plan.getPathToAliases().put(taskTmpDir, new ArrayList()); - plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, - new PartitionDesc(tt_desc, null)); - plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); - } - /** * Union Operator encountered . Currently, the algorithm is pretty simple: If - * all the sub-queries are map-only, don't do anything. However, if there is a - * mapjoin followed by the union, merge at the union Otherwise, insert a + * all the sub-queries are map-only, don't do anything. Otherwise, insert a * FileSink on top of all the sub-queries. * * This can be optimized later on. @@ -283,8 +256,7 @@ } // Copy into the current union task plan if - if (uPrsCtx.getMapOnlySubq(pos) - && !uPrsCtx.getMapJoinSubq(pos) && uPrsCtx.getRootTask(pos)) { + if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)) { processSubQueryUnionMerge(ctx, uCtxTask, union, stack); } // If it a map-reduce job, create a temporary file @@ -294,13 +266,10 @@ && (!ctx.getRootTasks().contains(currTask))) { ctx.getRootTasks().add(currTask); } - // If there is a mapjoin at position 'pos' - if (uPrsCtx.getMapJoinSubq(pos)) { - processSubQueryUnionMapJoin(ctx); - } - processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos), union, uTask, ctx, uCtxTask); - //the currAliasId and CurrTopOp is not valid any more + processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos), union, uTask, + ctx, uCtxTask); + // the currAliasId and CurrTopOp is not valid any more ctx.setCurrAliasId(null); ctx.setCurrTopOp(null); ctx.getOpTaskMap().put(null, uTask); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (working copy) @@ -81,7 +81,7 @@ } else { // This will happen in case of joins. The current plan can be thrown away // after being merged with the original plan - GenMapRedUtils.joinPlan(op, null, opMapTask, ctx, -1, false, false, false); + GenMapRedUtils.joinPlan(op, null, opMapTask, ctx, -1, false); currTask = opMapTask; ctx.setCurrTask(currTask); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -20,7 +20,6 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.ConcurrentModificationException; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; @@ -33,12 +32,10 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; -import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -47,19 +44,15 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; @@ -97,12 +90,12 @@ throws SemanticException { Operator reducer = op.getChildOperators().get(0); Map, GenMapRedCtx> mapCurrCtx = - opProcCtx.getMapCurrCtx(); + opProcCtx.getMapCurrCtx(); GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0)); Task currTask = mapredCtx.getCurrTask(); MapredWork plan = (MapredWork) currTask.getWork(); HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); + opProcCtx.getOpTaskMap(); Operator currTopOp = opProcCtx.getCurrTopOp(); opTaskMap.put(reducer, currTask); @@ -114,7 +107,7 @@ List> rootTasks = opProcCtx.getRootTasks(); if (!rootTasks.contains(currTask)) { - rootTasks.add(currTask); + rootTasks.add(currTask); } if (reducer.getClass() == JoinOperator.class) { plan.setNeedsTagging(true); @@ -137,167 +130,8 @@ opProcCtx.setCurrAliasId(currAliasId); } - public static void initMapJoinPlan( - Operator op, GenMRProcContext ctx, - boolean readInputMapJoin, boolean readInputUnion, boolean setReducer, int pos) - throws SemanticException { - initMapJoinPlan(op, ctx, readInputMapJoin, readInputUnion, setReducer, pos, false); - } /** - * Initialize the current plan by adding it to root tasks. - * - * @param op - * the map join operator encountered - * @param opProcCtx - * processing context - * @param pos - * position of the parent - */ - public static void initMapJoinPlan(Operator op, - GenMRProcContext opProcCtx, boolean readInputMapJoin, - boolean readInputUnion, boolean setReducer, int pos, boolean createLocalPlan) - throws SemanticException { - Map, GenMapRedCtx> mapCurrCtx = - opProcCtx.getMapCurrCtx(); - assert (((pos == -1) && (readInputMapJoin)) || (pos != -1)); - int parentPos = (pos == -1) ? 0 : pos; - GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get( - parentPos)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork plan = (MapredWork) currTask.getWork(); - HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); - Operator currTopOp = opProcCtx.getCurrTopOp(); - - // The mapjoin has already been encountered. Some context must be stored - // about that - if (readInputMapJoin) { - AbstractMapJoinOperator currMapJoinOp = opProcCtx.getCurrMapJoinOp(); - assert currMapJoinOp != null; - boolean local = ((pos == -1) || (pos == (currMapJoinOp.getConf()).getPosBigTable())) ? - false : true; - - if (setReducer) { - Operator reducer = op.getChildOperators().get(0); - plan.setReducer(reducer); - opTaskMap.put(reducer, currTask); - if (reducer.getClass() == JoinOperator.class) { - plan.setNeedsTagging(true); - } - ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf(); - plan.setNumReduceTasks(desc.getNumReducers()); - } else { - opTaskMap.put(op, currTask); - } - - if (!readInputUnion) { - GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(currMapJoinOp); - String taskTmpDir; - TableDesc tt_desc; - Operator rootOp; - - if (mjCtx.getOldMapJoin() == null || setReducer) { - taskTmpDir = mjCtx.getTaskTmpDir(); - tt_desc = mjCtx.getTTDesc(); - rootOp = mjCtx.getRootMapJoinOp(); - } else { - GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(mjCtx - .getOldMapJoin()); - taskTmpDir = oldMjCtx.getTaskTmpDir(); - tt_desc = oldMjCtx.getTTDesc(); - rootOp = oldMjCtx.getRootMapJoinOp(); - } - - setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - setupBucketMapJoinInfo(plan, currMapJoinOp, createLocalPlan); - } else { - initUnionPlan(opProcCtx, currTask, false); - } - - opProcCtx.setCurrMapJoinOp(null); - } else { - MapJoinDesc desc = (MapJoinDesc) op.getConf(); - - // The map is overloaded to keep track of mapjoins also - opTaskMap.put(op, currTask); - - List> rootTasks = opProcCtx.getRootTasks(); - if (!rootTasks.contains(currTask)) { - rootTasks.add(currTask); - } - - assert currTopOp != null; - List> seenOps = opProcCtx.getSeenOps(); - String currAliasId = opProcCtx.getCurrAliasId(); - - seenOps.add(currTopOp); - boolean local = (pos == desc.getPosBigTable()) ? false : true; - setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op, createLocalPlan); - } - - opProcCtx.setCurrTask(currTask); - opProcCtx.setCurrTopOp(null); - opProcCtx.setCurrAliasId(null); - } - - private static void setupBucketMapJoinInfo(MapredWork plan, - AbstractMapJoinOperator currMapJoinOp, boolean createLocalPlan) { - if (currMapJoinOp != null) { - Map>> aliasBucketFileNameMapping = - currMapJoinOp.getConf().getAliasBucketFileNameMapping(); - if(aliasBucketFileNameMapping!= null) { - MapredLocalWork localPlan = plan.getMapLocalWork(); - if(localPlan == null) { - if(currMapJoinOp instanceof SMBMapJoinOperator) { - localPlan = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork(); - } - if (localPlan == null && createLocalPlan) { - localPlan = new MapredLocalWork( - new LinkedHashMap>(), - new LinkedHashMap()); - } - } else { - //local plan is not null, we want to merge it into SMBMapJoinOperator's local work - if(currMapJoinOp instanceof SMBMapJoinOperator) { - MapredLocalWork smbLocalWork = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork(); - if(smbLocalWork != null) { - localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork()); - localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork()); - } - } - } - - if(localPlan == null) { - return; - } - - if(currMapJoinOp instanceof SMBMapJoinOperator) { - plan.setMapLocalWork(null); - ((SMBMapJoinOperator)currMapJoinOp).getConf().setLocalWork(localPlan); - } else { - plan.setMapLocalWork(localPlan); - } - BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext(); - localPlan.setBucketMapjoinContext(bucketMJCxt); - bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); - bucketMJCxt.setBucketFileNameMapping(currMapJoinOp.getConf().getBigTableBucketNumMapping()); - localPlan.setInputFileChangeSensitive(true); - bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); - bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); - bucketMJCxt.setBigTablePartSpecToFileMapping( - currMapJoinOp.getConf().getBigTablePartSpecToFileMapping()); - // BucketizedHiveInputFormat should be used for either sort merge join or bucket map join - if ((currMapJoinOp instanceof SMBMapJoinOperator) - || (currMapJoinOp.getConf().isBucketMapJoin())) { - plan.setUseBucketizedHiveInputFormat(true); - } - } - } - } - - /** * Initialize the current union plan. * * @param op @@ -312,7 +146,7 @@ MapredWork plan = (MapredWork) unionTask.getWork(); HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); + opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, unionTask); plan.setReducer(reducer); @@ -434,13 +268,6 @@ opProcCtx.setCurrTask(existingTask); } - public static void joinPlan(Operator op, - Task oldTask, Task task, - GenMRProcContext opProcCtx, int pos, boolean split, - boolean readMapJoinData, boolean readUnionData) throws SemanticException { - joinPlan(op, oldTask, task, opProcCtx, pos, split, readMapJoinData, readUnionData, false); - } - /** * Merge the current task with the task for the current reducer. * @@ -457,8 +284,7 @@ */ public static void joinPlan(Operator op, Task oldTask, Task task, - GenMRProcContext opProcCtx, int pos, boolean split, - boolean readMapJoinData, boolean readUnionData, boolean createLocalWork) + GenMRProcContext opProcCtx, int pos, boolean split) throws SemanticException { Task currTask = task; MapredWork plan = (MapredWork) currTask.getWork(); @@ -494,53 +320,15 @@ : true; } setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); - if(op instanceof AbstractMapJoinOperator) { - setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator)op, createLocalWork); - } } currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); - } else if (opProcCtx.getCurrMapJoinOp() != null) { - AbstractMapJoinOperator mjOp = opProcCtx.getCurrMapJoinOp(); - if (readUnionData) { - initUnionPlan(opProcCtx, currTask, false); - } else { - GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp); - - // In case of map-join followed by map-join, the file needs to be - // obtained from the old map join - AbstractMapJoinOperator oldMapJoin = mjCtx.getOldMapJoin(); - String taskTmpDir = null; - TableDesc tt_desc = null; - Operator rootOp = null; - - boolean local = ((pos == -1) || (pos == (mjOp.getConf()) - .getPosBigTable())) ? false : true; - if (oldMapJoin == null) { - if (opProcCtx.getParseCtx().getListMapJoinOpsNoReducer().contains(mjOp) - || local || (oldTask != null) && (parTasks != null)) { - taskTmpDir = mjCtx.getTaskTmpDir(); - tt_desc = mjCtx.getTTDesc(); - rootOp = mjCtx.getRootMapJoinOp(); - } - } else { - GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(oldMapJoin); - assert oldMjCtx != null; - taskTmpDir = oldMjCtx.getTaskTmpDir(); - tt_desc = oldMjCtx.getTTDesc(); - rootOp = oldMjCtx.getRootMapJoinOp(); - } - - setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); - setupBucketMapJoinInfo(plan, oldMapJoin, createLocalWork); - } - opProcCtx.setCurrMapJoinOp(null); } if ((oldTask != null) && (parTasks != null)) { for (Task parTask : parTasks) { parTask.addDependentTask(currTask); - if(opProcCtx.getRootTasks().contains(currTask)) { + if (opProcCtx.getRootTasks().contains(currTask)) { opProcCtx.getRootTasks().remove(currTask); } } @@ -558,7 +346,7 @@ * processing context */ public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) - throws SemanticException { + throws SemanticException { // Generate a new task ParseContext parseCtx = opProcCtx.getParseCtx(); MapredWork cplan = getMapRedWork(parseCtx); @@ -573,7 +361,7 @@ cplan.setNumReduceTasks(new Integer(desc.getNumReducers())); HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); + opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, redTask); Task currTask = opProcCtx.getCurrTask(); @@ -635,12 +423,12 @@ if (partsList == null) { try { - partsList = parseCtx.getOpToPartList().get((TableScanOperator)topOp); + partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp); if (partsList == null) { partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), - parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), - alias_id, parseCtx.getPrunedPartitions()); - parseCtx.getOpToPartList().put((TableScanOperator)topOp, partsList); + parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), + alias_id, parseCtx.getPrunedPartitions()); + parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList); } } catch (SemanticException e) { throw e; @@ -679,7 +467,8 @@ long sizeNeeded = Integer.MAX_VALUE; int fileLimit = -1; if (parseCtx.getGlobalLimitCtx().isEnable()) { - long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITMAXROWSIZE); + long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), + HiveConf.ConfVars.HIVELIMITMAXROWSIZE); sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow; // for the optimization that reduce number of input file, we limit number // of files allowed. If more than specific number of files have to be @@ -687,7 +476,7 @@ // inputs can cause unpredictable latency. It's not necessarily to be // cheaper. fileLimit = - HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE); + HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE); if (sizePerRow <= 0 || fileLimit <= 0) { LOG.info("Skip optimization to reduce input size of 'limit'"); @@ -870,7 +659,7 @@ Operator topOp, MapredWork plan, boolean local, TableDesc tt_desc) throws SemanticException { - if(path == null || alias == null) { + if (path == null || alias == null) { return; } @@ -952,8 +741,8 @@ MapredWork work = new MapredWork(); boolean mapperCannotSpanPartns = - conf.getBoolVar( - HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); + conf.getBoolVar( + HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); work.setMapperCannotSpanPartns(mapperCannotSpanPartns); work.setPathToAliases(new LinkedHashMap>()); work.setPathToPartitionInfo(new LinkedHashMap()); @@ -1034,7 +823,7 @@ // replace the reduce child with this operator List> childOpList = parent - .getChildOperators(); + .getChildOperators(); for (int pos = 0; pos < childOpList.size(); pos++) { if (childOpList.get(pos) == op) { childOpList.set(pos, fs_op); @@ -1043,7 +832,7 @@ } List> parentOpList = - new ArrayList>(); + new ArrayList>(); parentOpList.add(parent); fs_op.setParentOperators(parentOpList); @@ -1059,7 +848,7 @@ op.getParentOperators().set(posn, ts_op); Map, GenMapRedCtx> mapCurrCtx = - opProcCtx.getMapCurrCtx(); + opProcCtx.getMapCurrCtx(); mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null, null)); String streamDesc = taskTmpDir; @@ -1087,99 +876,11 @@ // Add the path to alias mapping setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, local, tt_desc); - - // This can be cleaned up as a function table in future - if (op instanceof AbstractMapJoinOperator) { - AbstractMapJoinOperator mjOp = (AbstractMapJoinOperator) op; - opProcCtx.setCurrMapJoinOp(mjOp); - GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp); - if (mjCtx == null) { - mjCtx = new GenMRMapJoinCtx(taskTmpDir, tt_desc, ts_op, null); - } else { - mjCtx.setTaskTmpDir(taskTmpDir); - mjCtx.setTTDesc(tt_desc); - mjCtx.setRootMapJoinOp(ts_op); - } - opProcCtx.setMapJoinCtx(mjOp, mjCtx); - opProcCtx.getMapCurrCtx().put(parent, - new GenMapRedCtx(childTask, null, null)); - setupBucketMapJoinInfo(cplan, mjOp, false); - } - - currTopOp = null; - String currAliasId = null; - - opProcCtx.setCurrTopOp(currTopOp); - opProcCtx.setCurrAliasId(currAliasId); + opProcCtx.setCurrTopOp(null); + opProcCtx.setCurrAliasId(null); opProcCtx.setCurrTask(childTask); } - public static void mergeMapJoinUnion(UnionOperator union, - GenMRProcContext ctx, int pos) throws SemanticException { - ParseContext parseCtx = ctx.getParseCtx(); - UnionProcContext uCtx = parseCtx.getUCtx(); - - UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union); - assert uPrsCtx != null; - - Task currTask = ctx.getCurrTask(); - - GenMRUnionCtx uCtxTask = ctx.getUnionTask(union); - Task uTask = null; - - union.getParentOperators().get(pos); - MapredWork uPlan = null; - - // union is encountered for the first time - if (uCtxTask == null) { - uCtxTask = new GenMRUnionCtx(); - uPlan = GenMapRedUtils.getMapRedWork(parseCtx); - uTask = TaskFactory.get(uPlan, parseCtx.getConf()); - uCtxTask.setUTask(uTask); - ctx.setUnionTask(union, uCtxTask); - } else { - uTask = uCtxTask.getUTask(); - uPlan = (MapredWork) uTask.getWork(); - } - - // If there is a mapjoin at position 'pos' - if (uPrsCtx.getMapJoinSubq(pos)) { - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(ctx.getCurrMapJoinOp()); - String taskTmpDir = mjCtx.getTaskTmpDir(); - if (uPlan.getPathToAliases().get(taskTmpDir) == null) { - uPlan.getPathToAliases().put(taskTmpDir, new ArrayList()); - uPlan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - uPlan.getPathToPartitionInfo().put(taskTmpDir, - new PartitionDesc(mjCtx.getTTDesc(), null)); - uPlan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); - } - - for (Task t : currTask.getParentTasks()) { - t.addDependentTask(uTask); - } - try { - boolean notDone = true; - while (notDone) { - for (Task t : currTask.getParentTasks()) { - t.removeDependentTask(currTask); - } - notDone = false; - } - } catch (ConcurrentModificationException e) { - } - } else { - setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), uPlan, false, ctx); - } - - ctx.setCurrTask(uTask); - ctx.setCurrAliasId(null); - ctx.setCurrTopOp(null); - ctx.setCurrMapJoinOp(null); - - ctx.getMapCurrCtx().put(union, - new GenMapRedCtx(ctx.getCurrTask(), null, null)); - } - private GenMapRedUtils() { // prevent instantiation } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (working copy) @@ -18,48 +18,38 @@ package org.apache.hadoop.hive.ql.optimizer; import java.io.Serializable; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Stack; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; -import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; -import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.TableDesc; /** * Operator factory for MapJoin processing. */ public final class MapJoinFactory { - public static int getPositionParent(AbstractMapJoinOperator op, Stack stack) { + public static int getPositionParent(AbstractMapJoinOperator op, + Stack stack) { int pos = 0; int size = stack.size(); assert size >= 2 && stack.get(size - 1) == op; Operator parent = - (Operator) stack.get(size - 2); + (Operator) stack.get(size - 2); List> parOp = op.getParentOperators(); pos = parOp.indexOf(parent); assert pos < parOp.size(); @@ -71,217 +61,148 @@ */ public static class TableScanMapJoin implements NodeProcessor { - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; - GenMRProcContext ctx = (GenMRProcContext) procCtx; + public static void setupBucketMapJoinInfo(MapredWork plan, + AbstractMapJoinOperator currMapJoinOp) { + if (currMapJoinOp != null) { + Map>> aliasBucketFileNameMapping = + currMapJoinOp.getConf().getAliasBucketFileNameMapping(); + if (aliasBucketFileNameMapping != null) { + MapredLocalWork localPlan = plan.getMapLocalWork(); + if (localPlan == null) { + if (currMapJoinOp instanceof SMBMapJoinOperator) { + localPlan = ((SMBMapJoinOperator) currMapJoinOp).getConf().getLocalWork(); + } + } else { + // local plan is not null, we want to merge it into SMBMapJoinOperator's local work + if (currMapJoinOp instanceof SMBMapJoinOperator) { + MapredLocalWork smbLocalWork = ((SMBMapJoinOperator) currMapJoinOp).getConf() + .getLocalWork(); + if (smbLocalWork != null) { + localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork()); + localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork()); + } + } + } - // find the branch on which this processor was invoked - int pos = getPositionParent(mapJoin, stack); + if (localPlan == null) { + return; + } - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get( - pos)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork currPlan = (MapredWork) currTask.getWork(); - Operator currTopOp = mapredCtx.getCurrTopOp(); - String currAliasId = mapredCtx.getCurrAliasId(); - Operator reducer = mapJoin; - HashMap, Task> opTaskMap = - ctx.getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); - - ctx.setCurrTopOp(currTopOp); - ctx.setCurrAliasId(currAliasId); - ctx.setCurrTask(currTask); - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - assert currPlan.getReducer() == null; - GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, false, false, false, pos); - } else { - // The current plan can be thrown away after being merged with the - // original plan - GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, - false, false); - currTask = opMapTask; - ctx.setCurrTask(currTask); + if (currMapJoinOp instanceof SMBMapJoinOperator) { + plan.setMapLocalWork(null); + ((SMBMapJoinOperator) currMapJoinOp).getConf().setLocalWork(localPlan); + } else { + plan.setMapLocalWork(localPlan); + } + BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext(); + localPlan.setBucketMapjoinContext(bucketMJCxt); + bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); + bucketMJCxt.setBucketFileNameMapping( + currMapJoinOp.getConf().getBigTableBucketNumMapping()); + localPlan.setInputFileChangeSensitive(true); + bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); + bucketMJCxt + .setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class); + bucketMJCxt.setBigTablePartSpecToFileMapping( + currMapJoinOp.getConf().getBigTablePartSpecToFileMapping()); + // BucketizedHiveInputFormat should be used for either sort merge join or bucket map join + if ((currMapJoinOp instanceof SMBMapJoinOperator) + || (currMapJoinOp.getConf().isBucketMapJoin())) { + plan.setUseBucketizedHiveInputFormat(true); + } + } } - - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx - .getCurrTopOp(), ctx.getCurrAliasId())); - return null; } - } - /** - * ReduceSink followed by MapJoin. - */ - public static class ReduceSinkMapJoin implements NodeProcessor { + /** + * Initialize the current plan by adding it to root tasks. + * + * @param op + * the map join operator encountered + * @param opProcCtx + * processing context + * @param pos + * position of the parent + */ + private static void initMapJoinPlan(AbstractMapJoinOperator op, + GenMRProcContext opProcCtx, int pos) + throws SemanticException { + Map, GenMapRedCtx> mapCurrCtx = + opProcCtx.getMapCurrCtx(); + int parentPos = (pos == -1) ? 0 : pos; + GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get( + parentPos)); + Task currTask = mapredCtx.getCurrTask(); + MapredWork plan = (MapredWork) currTask.getWork(); + HashMap, Task> opTaskMap = + opProcCtx.getOpTaskMap(); + Operator currTopOp = opProcCtx.getCurrTopOp(); - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; - GenMRProcContext opProcCtx = (GenMRProcContext) procCtx; + MapJoinDesc desc = (MapJoinDesc) op.getConf(); - ParseContext parseCtx = opProcCtx.getParseCtx(); - MapredWork cplan = GenMapRedUtils.getMapRedWork(parseCtx); - Task redTask = TaskFactory.get(cplan, parseCtx - .getConf()); - Task currTask = opProcCtx.getCurrTask(); + // The map is overloaded to keep track of mapjoins also + opTaskMap.put(op, currTask); - // find the branch on which this processor was invoked - int pos = getPositionParent(mapJoin, stack); - boolean local = (pos == ((mapJoin.getConf())).getPosBigTable()) ? false - : true; + List> rootTasks = opProcCtx.getRootTasks(); + assert (!rootTasks.contains(currTask)); + rootTasks.add(currTask); - GenMapRedUtils.splitTasks(mapJoin, currTask, redTask, opProcCtx, false, - local, pos); + assert currTopOp != null; + opProcCtx.getSeenOps().add(currTopOp); - currTask = opProcCtx.getCurrTask(); - HashMap, Task> opTaskMap = - opProcCtx.getOpTaskMap(); - Task opMapTask = opTaskMap.get(mapJoin); - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - assert cplan.getReducer() == null; - opTaskMap.put(mapJoin, currTask); - opProcCtx.setCurrMapJoinOp(null); - } else { - // The current plan can be thrown away after being merged with the - // original plan - GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, opProcCtx, pos, - false, false, false); - currTask = opMapTask; - opProcCtx.setCurrTask(currTask); - } - - return null; + String currAliasId = opProcCtx.getCurrAliasId(); + boolean local = (pos == desc.getPosBigTable()) ? false : true; + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); + setupBucketMapJoinInfo(plan, op); } - } - /** - * MapJoin followed by Select. - */ - public static class MapJoin implements NodeProcessor { - /** - * Create a task by splitting the plan below the join. The reason, we have - * to do so in the processing of Select and not MapJoin is due to the - * walker. While processing a node, it is not safe to alter its children - * because that will decide the course of the walk. It is perfectly fine to - * muck around with its parents though, since those nodes have already been - * visited. + * Merge the current task with the task for the current reducer. + * + * @param op + * operator being processed + * @param oldTask + * the old task for the current reducer + * @param task + * the current task for the current reducer + * @param opProcCtx + * processing context + * @param pos + * position of the parent in the stack */ - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { + public static void joinMapJoinPlan(AbstractMapJoinOperator op, + Task task, + GenMRProcContext opProcCtx, int pos) + throws SemanticException { + Task currTask = task; + MapredWork plan = (MapredWork) currTask.getWork(); + Operator currTopOp = opProcCtx.getCurrTopOp(); + List> parTasks = null; - SelectOperator sel = (SelectOperator) nd; - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) sel.getParentOperators().get( - 0); - assert sel.getParentOperators().size() == 1; + List> seenOps = opProcCtx.getSeenOps(); + String currAliasId = opProcCtx.getCurrAliasId(); - GenMRProcContext ctx = (GenMRProcContext) procCtx; - ParseContext parseCtx = ctx.getParseCtx(); - - // is the mapjoin followed by a reducer - List> listMapJoinOps = parseCtx - .getListMapJoinOpsNoReducer(); - - if (listMapJoinOps.contains(mapJoin)) { - ctx.setCurrAliasId(null); - ctx.setCurrTopOp(null); - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - mapCurrCtx.put((Operator) nd, new GenMapRedCtx( - ctx.getCurrTask(), null, null)); - return null; + if (!seenOps.contains(currTopOp)) { + seenOps.add(currTopOp); + boolean local = false; + if (pos != -1) { + local = (pos == ((MapJoinDesc) op.getConf()).getPosBigTable()) ? false + : true; + } + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); + setupBucketMapJoinInfo(plan, op); } - - ctx.setCurrMapJoinOp(mapJoin); - - Task currTask = ctx.getCurrTask(); - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); - if (mjCtx == null) { - mjCtx = new GenMRMapJoinCtx(); - ctx.setMapJoinCtx(mapJoin, mjCtx); - } - - MapredWork mjPlan = GenMapRedUtils.getMapRedWork(parseCtx); - Task mjTask = TaskFactory.get(mjPlan, parseCtx - .getConf()); - - TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils - .getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol")); - - // generate the temporary file - Context baseCtx = parseCtx.getContext(); - String taskTmpDir = baseCtx.getMRTmpFileURI(); - - // Add the path to alias mapping - mjCtx.setTaskTmpDir(taskTmpDir); - mjCtx.setTTDesc(tt_desc); - mjCtx.setRootMapJoinOp(sel); - - sel.setParentOperators(null); - - // Create a file sink operator for this file name - Operator fs_op = OperatorFactory.get( - new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar( - HiveConf.ConfVars.COMPRESSINTERMEDIATE)), mapJoin.getSchema()); - - assert mapJoin.getChildOperators().size() == 1; - mapJoin.getChildOperators().set(0, fs_op); - - List> parentOpList = - new ArrayList>(); - parentOpList.add(mapJoin); - fs_op.setParentOperators(parentOpList); - - currTask.addDependentTask(mjTask); - - ctx.setCurrTask(mjTask); - ctx.setCurrAliasId(null); - ctx.setCurrTopOp(null); - - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - mapCurrCtx.put((Operator) nd, new GenMapRedCtx( - ctx.getCurrTask(), null, null)); - - return null; + currTopOp = null; + opProcCtx.setCurrTopOp(currTopOp); + opProcCtx.setCurrTask(currTask); } - } - /** - * MapJoin followed by MapJoin. - */ - public static class MapJoinMapJoin implements NodeProcessor { - @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - AbstractMapJoinOperator mapJoin = - (AbstractMapJoinOperator) nd; + Object... nodeOutputs) throws SemanticException { + AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; GenMRProcContext ctx = (GenMRProcContext) procCtx; - ctx.getParseCtx(); - AbstractMapJoinOperator oldMapJoin = ctx.getCurrMapJoinOp(); - - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin); - if (mjCtx != null) { - mjCtx.setOldMapJoin(oldMapJoin); - } else { - ctx.setMapJoinCtx(mapJoin, new GenMRMapJoinCtx(null, null, null, - oldMapJoin)); - } - ctx.setCurrMapJoinOp(mapJoin); - // find the branch on which this processor was invoked int pos = getPositionParent(mapJoin, stack); @@ -291,99 +212,29 @@ pos)); Task currTask = mapredCtx.getCurrTask(); MapredWork currPlan = (MapredWork) currTask.getWork(); - mapredCtx.getCurrAliasId(); + Operator currTopOp = mapredCtx.getCurrTopOp(); + String currAliasId = mapredCtx.getCurrAliasId(); Operator reducer = mapJoin; HashMap, Task> opTaskMap = - ctx.getOpTaskMap(); + ctx.getOpTaskMap(); Task opMapTask = opTaskMap.get(reducer); + ctx.setCurrTopOp(currTopOp); + ctx.setCurrAliasId(currAliasId); ctx.setCurrTask(currTask); // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { assert currPlan.getReducer() == null; - GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, false, false, pos); + initMapJoinPlan(mapJoin, ctx, pos); } else { // The current plan can be thrown away after being merged with the // original plan - GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, ctx, pos, false, - true, false); + joinMapJoinPlan(mapJoin, opMapTask, ctx, pos); currTask = opMapTask; ctx.setCurrTask(currTask); } - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), null, null)); - return null; - } - } - - /** - * Union followed by MapJoin. - */ - public static class UnionMapJoin implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - GenMRProcContext ctx = (GenMRProcContext) procCtx; - - ParseContext parseCtx = ctx.getParseCtx(); - UnionProcContext uCtx = parseCtx.getUCtx(); - - // union was map only - no special processing needed - if (uCtx.isMapOnlySubq()) { - return (new TableScanMapJoin()) - .process(nd, stack, procCtx, nodeOutputs); - } - - UnionOperator currUnion = ctx.getCurrUnionOp(); - assert currUnion != null; - ctx.getUnionTask(currUnion); - AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd; - - // find the branch on which this processor was invoked - int pos = getPositionParent(mapJoin, stack); - - Map, GenMapRedCtx> mapCurrCtx = ctx - .getMapCurrCtx(); - GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get( - pos)); - Task currTask = mapredCtx.getCurrTask(); - MapredWork currPlan = (MapredWork) currTask.getWork(); - Operator reducer = mapJoin; - HashMap, Task> opTaskMap = - ctx.getOpTaskMap(); - Task opMapTask = opTaskMap.get(reducer); - - // union result cannot be a map table - boolean local = (pos == (mapJoin.getConf()).getPosBigTable()) ? false - : true; - if (local) { - throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_TABLE.getMsg()); - } - - // If the plan for this reducer does not exist, initialize the plan - if (opMapTask == null) { - assert currPlan.getReducer() == null; - ctx.setCurrMapJoinOp(mapJoin); - GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, true, false, pos); - ctx.setCurrUnionOp(null); - } else { - // The current plan can be thrown away after being merged with the - // original plan - Task uTask = ctx.getUnionTask( - ctx.getCurrUnionOp()).getUTask(); - if (uTask.getId().equals(opMapTask.getId())) { - GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false, - false, true); - } else { - GenMapRedUtils.joinPlan(mapJoin, uTask, opMapTask, ctx, pos, false, - false, true); - } - currTask = opMapTask; - ctx.setCurrTask(currTask); - } - mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx .getCurrTopOp(), ctx.getCurrAliasId())); return null; @@ -394,22 +245,6 @@ return new TableScanMapJoin(); } - public static NodeProcessor getUnionMapJoin() { - return new UnionMapJoin(); - } - - public static NodeProcessor getReduceSinkMapJoin() { - return new ReduceSinkMapJoin(); - } - - public static NodeProcessor getMapJoin() { - return new MapJoin(); - } - - public static NodeProcessor getMapJoinMapJoin() { - return new MapJoinMapJoin(); - } - private MapJoinFactory() { // prevent instantiation } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (working copy) @@ -71,8 +71,7 @@ if (opMapTask == null) { GenMapRedUtils.splitPlan(op, ctx); } else { - GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true, false, - false); + GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true); currTask = opMapTask; ctx.setCurrTask(currTask); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy) @@ -32,12 +32,10 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MapRedTask; import org.apache.hadoop.hive.ql.exec.MoveTask; import org.apache.hadoop.hive.ql.exec.Operator; @@ -52,7 +50,6 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; @@ -67,7 +64,6 @@ import org.apache.hadoop.hive.ql.plan.ExtractDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -774,13 +770,7 @@ private String processFS(Node nd, Stack stack, NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException { - // Is it the dummy file sink after the mapjoin FileSinkOperator fsOp = (FileSinkOperator) nd; - if ((fsOp.getParentOperators().size() == 1) - && (fsOp.getParentOperators().get(0) instanceof MapJoinOperator)) { - return null; - } - GenMRProcContext ctx = (GenMRProcContext) opProcCtx; List seenFSOps = ctx.getSeenFileSinkOps(); if (seenFSOps == null) { @@ -884,24 +874,6 @@ return dest; } - AbstractMapJoinOperator currMapJoinOp = ctx.getCurrMapJoinOp(); - - if (currMapJoinOp != null) { - opTaskMap.put(null, currTask); - GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(currMapJoinOp); - MapredWork plan = (MapredWork) currTask.getWork(); - - String taskTmpDir = mjCtx.getTaskTmpDir(); - TableDesc tt_desc = mjCtx.getTTDesc(); - assert plan.getPathToAliases().get(taskTmpDir) == null; - plan.getPathToAliases().put(taskTmpDir, new ArrayList()); - plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, - new PartitionDesc(tt_desc, null)); - plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); - return dest; - } - return dest; } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (working copy) @@ -148,4 +148,14 @@ public OperatorType getType() { return OperatorType.UNION; } + + @Override + public boolean opAllowedBeforeMapJoin() { + return false; + } + + @Override + public boolean opAllowedAfterMapJoin() { + return false; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy) @@ -1429,4 +1429,22 @@ public boolean supportUnionRemoveOptimization() { return false; } + + /* + * This operator is allowed before mapjoin. Eventually, mapjoin hint should be done away with. + * But, since bucketized mapjoin and sortmerge join depend on it completely. it is needed. + * Check the operators which are allowed before mapjoin. + */ + public boolean opAllowedBeforeMapJoin() { + return true; + } + + /* + * This operator is allowed after mapjoin. Eventually, mapjoin hint should be done away with. + * But, since bucketized mapjoin and sortmerge join depend on it completely. it is needed. + * Check the operators which are allowed after mapjoin. + */ + public boolean opAllowedAfterMapJoin() { + return true; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (working copy) @@ -321,4 +321,9 @@ public OperatorType getType() { return OperatorType.REDUCESINK; } + + @Override + public boolean opAllowedBeforeMapJoin() { + return false; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (working copy) @@ -933,4 +933,13 @@ this.posToAliasMap = posToAliasMap; } + @Override + public boolean opAllowedBeforeMapJoin() { + return false; + } + + @Override + public boolean opAllowedAfterMapJoin() { + return false; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -70,7 +70,6 @@ import org.apache.hadoop.hive.ql.exec.RecordWriter; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -105,7 +104,6 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; -import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4; import org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1; import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; @@ -216,11 +214,11 @@ private final UnparseTranslator unparseTranslator; private final GlobalLimitCtx globalLimitCtx = new GlobalLimitCtx(); - //prefix for column names auto generated by hive + // prefix for column names auto generated by hive private final String autogenColAliasPrfxLbl; private final boolean autogenColAliasPrfxIncludeFuncName; - //Max characters when auto generating the column name with func name + // Max characters when auto generating the column name with func name private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20; private static class Phase1Ctx { @@ -249,9 +247,9 @@ prunedPartitions = new HashMap(); unparseTranslator = new UnparseTranslator(); autogenColAliasPrfxLbl = HiveConf.getVar(conf, - HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL); + HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL); autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME); + HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME); queryProperties = new QueryProperties(); opToPartToSkewedPruner = new HashMap>(); } @@ -354,7 +352,8 @@ ASTNode selectExpr, QBParseInfo qbp) { for (int i = 0; i < selectExpr.getChildCount(); ++i) { ASTNode selExpr = (ASTNode) selectExpr.getChild(i); - if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR) && (selExpr.getChildCount() == 2)) { + if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR) + && (selExpr.getChildCount() == 2)) { String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText()); qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias); } @@ -489,15 +488,15 @@ // Need to change it to list of columns if (sampleCols.size() > 2) { throw new SemanticException(generateErrorMessage( - (ASTNode) tabref.getChild(0), - ErrorMsg.SAMPLE_RESTRICTION.getMsg())); + (ASTNode) tabref.getChild(0), + ErrorMsg.SAMPLE_RESTRICTION.getMsg())); } qb.getParseInfo().setTabSample( alias, new TableSample( - unescapeIdentifier(sampleClause.getChild(0).getText()), - unescapeIdentifier(sampleClause.getChild(1).getText()), - sampleCols)); + unescapeIdentifier(sampleClause.getChild(0).getText()), + unescapeIdentifier(sampleClause.getChild(1).getText()), + sampleCols)); if (unparseTranslator.isEnabled()) { for (ASTNode sampleCol : sampleCols) { unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol @@ -508,13 +507,12 @@ // only CombineHiveInputFormat supports this optimize String inputFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT); if (!inputFormat.equals( - CombineHiveInputFormat.class.getName())) { + CombineHiveInputFormat.class.getName())) { throw new SemanticException(generateErrorMessage((ASTNode) tabref.getChild(1), "Percentage sampling is not supported in " + inputFormat)); } ASTNode sampleClause = (ASTNode) tabref.getChild(1); String alias_id = getAliasId(alias, qb); - Tree type = sampleClause.getChild(0); String numerator = unescapeIdentifier(sampleClause.getChild(1).getText()); @@ -616,7 +614,7 @@ if ((numChildren != 2) && (numChildren != 3) && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) { throw new SemanticException(generateErrorMessage(join, - "Join with multiple children")); + "Join with multiple children")); } for (int num = 0; num < numChildren; num++) { @@ -729,7 +727,7 @@ case HiveParser.TOK_INSERT_INTO: String currentDatabase = db.getCurrentDatabase(); - String tab_name = getUnescapedName((ASTNode)ast.getChild(0).getChild(0), currentDatabase); + String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase); qbp.addInsertIntoTable(tab_name); case HiveParser.TOK_DESTINATION: @@ -753,7 +751,7 @@ int child_count = ast.getChildCount(); if (child_count != 1) { throw new SemanticException(generateErrorMessage(ast, - "Multiple Children " + child_count)); + "Multiple Children " + child_count)); } // Check if this is a subquery / lateral view @@ -786,10 +784,10 @@ qbp.setDistributeByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg())); + ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg())); } else if (qbp.getOrderByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg())); + ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg())); } break; @@ -800,10 +798,10 @@ qbp.setSortByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg())); + ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg())); } else if (qbp.getOrderByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg())); + ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg())); } break; @@ -815,7 +813,7 @@ qbp.setOrderByExprForClause(ctx_1.dest, ast); if (qbp.getClusterByForClause(ctx_1.dest) != null) { throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg())); + ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg())); } break; @@ -831,7 +829,7 @@ } if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg())); + ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg())); } qbp.setGroupByExprForClause(ctx_1.dest, ast); skipRecursion = true; @@ -858,7 +856,7 @@ case HiveParser.TOK_ANALYZE: // Case of analyze command - String table_name = getUnescapedName((ASTNode)ast.getChild(0).getChild(0)); + String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)); qb.setTabAlias(table_name, table_name); @@ -876,7 +874,7 @@ // select * from (subq1 union subq2) subqalias if (!qbp.getIsSubQ()) { throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.UNION_NOTIN_SUBQ.getMsg())); + ErrorMsg.UNION_NOTIN_SUBQ.getMsg())); } case HiveParser.TOK_INSERT: @@ -919,7 +917,7 @@ } } else { throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS - .getMsg(partition.toString())); + .getMsg(partition.toString())); } } @@ -971,7 +969,7 @@ } // Disallow INSERT INTO on bucketized tables - if(qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName()) && + if (qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName()) && tab.getNumBuckets() > 0) { throw new SemanticException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE. getMsg("Table: " + tab_name)); @@ -993,9 +991,9 @@ if (qb.getParseInfo().isAnalyzeCommand()) { throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg()); } - String fullViewName = tab.getDbName()+"."+tab.getTableName(); + String fullViewName = tab.getDbName() + "." + tab.getTableName(); // Prevent view cycles - if(viewsExpanded.contains(fullViewName)){ + if (viewsExpanded.contains(fullViewName)) { throw new SemanticException("Recursive view " + fullViewName + " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") + " -> " + fullViewName + ")."); @@ -1010,8 +1008,8 @@ if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) { throw new SemanticException(generateErrorMessage( - qb.getParseInfo().getSrcForAlias(alias), - ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg())); + qb.getParseInfo().getSrcForAlias(alias), + ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg())); } qb.getMetaData().setSrcForAlias(alias, tab); @@ -1022,8 +1020,9 @@ try { ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec); } catch (HiveException e) { - throw new SemanticException(generateErrorMessage(qb.getParseInfo().getSrcForAlias(alias), - "Cannot get partitions for " + ts.partSpec), e); + throw new SemanticException(generateErrorMessage( + qb.getParseInfo().getSrcForAlias(alias), + "Cannot get partitions for " + ts.partSpec), e); } } qb.getParseInfo().addTableSpec(alias, ts); @@ -1040,7 +1039,7 @@ QBExpr qbexpr = qb.getSubqForAlias(alias); getMetaData(qbexpr); if (wasView) { - viewsExpanded.remove(viewsExpanded.size()-1); + viewsExpanded.remove(viewsExpanded.size() - 1); } } @@ -1100,7 +1099,7 @@ ctx.setResFile(null); // allocate a temporary output dir on the location of the table - String tableName = getUnescapedName((ASTNode)ast.getChild(0)); + String tableName = getUnescapedName((ASTNode) ast.getChild(0)); Table newTable = db.newTable(tableName); Path location; try { @@ -1114,7 +1113,7 @@ FileUtils.makeQualified(location, conf).toUri()); } catch (Exception e) { throw new SemanticException(generateErrorMessage(ast, - "Error creating temporary folder on: " + location.toString()), e); + "Error creating temporary folder on: " + location.toString()), e); } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { tableSpec ts = new tableSpec(db, conf, this.ast); @@ -1135,7 +1134,7 @@ } default: throw new SemanticException(generateErrorMessage(ast, - "Unknown Token Type " + ast.getToken().getType())); + "Unknown Token Type " + ast.getToken().getType())); } } } catch (HiveException e) { @@ -1153,7 +1152,7 @@ ASTNode viewTree; final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(), tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias( - alias)); + alias)); try { String viewText = tab.getViewExpandedText(); // Reparse text, passing null for context to avoid clobbering @@ -1335,6 +1334,7 @@ joinTree.addFilterMapping(cond.getRight(), cond.getLeft(), filters.get(1).size()); } } + /** * Parse the join condition. If the condition is a join condition, throw an * error if it is not an equality. Otherwise, break it into left and right @@ -1400,7 +1400,7 @@ if ((rightCondAl1.size() != 0) || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) { if (type.equals(JoinType.LEFTOUTER) || - type.equals(JoinType.FULLOUTER)) { + type.equals(JoinType.FULLOUTER)) { if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) { joinTree.getFilters().get(0).add(joinCond); } else { @@ -1487,7 +1487,7 @@ for (int ci = childrenBegin; ci < joinCond.getChildCount(); ci++) { parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(ci), leftAlias.get(ci - childrenBegin), rightAlias.get(ci - - childrenBegin), null); + - childrenBegin), null); } boolean leftAliasNull = true; @@ -1568,7 +1568,7 @@ Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema( - inputRR.getColumnInfos()), input), inputRR); + inputRR.getColumnInfos()), input), inputRR); return output; } @@ -1599,11 +1599,11 @@ RowResolver inputRR = inputCtx.getRowResolver(); Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema( - inputRR.getColumnInfos()), input), inputRR); + inputRR.getColumnInfos()), input), inputRR); if (LOG.isDebugEnabled()) { LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: " - + inputRR.toString()); + + inputRR.toString()); } return output; } @@ -1671,8 +1671,8 @@ col_list.add(expr); output.put(tmp[0], tmp[1], new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), - colInfo.getTabAlias(), colInfo.getIsVirtualCol(), - colInfo.isHiddenVirtualCol())); + colInfo.getTabAlias(), colInfo.getIsVirtualCol(), + colInfo.isHiddenVirtualCol())); pos = Integer.valueOf(pos.intValue() + 1); matched++; @@ -1794,7 +1794,7 @@ tblDesc.getProperties().setProperty(serdeConstants.LINE_DELIM, lineDelim); if (!lineDelim.equals("\n") && !lineDelim.equals("10")) { throw new SemanticException(generateErrorMessage(rowChild, - ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg())); + ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg())); } break; default: @@ -1879,7 +1879,7 @@ String intName = getColumnInternalName(i); ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils .getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child - .getChild(1))), null, false); + .getChild(1))), null, false); colInfo.setAlias(colAlias); outputCols.add(colInfo); } @@ -1972,8 +1972,8 @@ Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new ScriptDesc( - fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), - inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), + fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), + inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); return output; @@ -2032,7 +2032,7 @@ private List getGroupingSetsForRollup(int size) { List groupingSetKeys = new ArrayList(); for (int i = 0; i <= size; i++) { - groupingSetKeys.add((1 << i) - 1); + groupingSetKeys.add((1 << i) - 1); } return groupingSetKeys; } @@ -2040,7 +2040,7 @@ private List getGroupingSetsForCube(int size) { int count = 1 << size; List results = new ArrayList(count); - for(int i = 0; i < count; ++i) { + for (int i = 0; i < count; ++i) { results.add(i); } return results; @@ -2077,16 +2077,16 @@ if (root != null) { for (int i = 0; i < root.getChildCount(); ++i) { ASTNode child = (ASTNode) root.getChild(i); - if(child.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) { + if (child.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) { continue; } int bitmap = 0; for (int j = 0; j < child.getChildCount(); ++j) { String treeAsString = child.getChild(j).toStringTree(); Integer pos = exprPos.get(treeAsString); - if(pos == null) { + if (pos == null) { throw new SemanticException( - generateErrorMessage((ASTNode)child.getChild(j), + generateErrorMessage((ASTNode) child.getChild(j), ErrorMsg.HIVE_GROUPING_SETS_EXPR_NOT_IN_GROUPBY.getErrorCodedMsg())); } bitmap = setBit(bitmap, pos); @@ -2094,7 +2094,7 @@ result.add(bitmap); } } - if(checkForNoAggr(result)) { + if (checkForNoAggr(result)) { throw new SemanticException( ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOFUNC.getMsg()); } @@ -2103,7 +2103,7 @@ private boolean checkForNoAggr(List bitmaps) { boolean ret = true; - for(int mask : bitmaps) { + for (int mask : bitmaps) { ret &= mask == 0; } return ret; @@ -2167,7 +2167,7 @@ ASTNode root = (ASTNode) selExpr.getChild(0); if (root.getType() == HiveParser.TOK_TABLE_OR_COL) { colAlias = - BaseSemanticAnalyzer.unescapeIdentifier(root.getChild(0).getText()); + BaseSemanticAnalyzer.unescapeIdentifier(root.getChild(0).getText()); colRef[0] = tabAlias; colRef[1] = colAlias; return colRef; @@ -2189,23 +2189,23 @@ } } - //if specified generate alias using func name + // if specified generate alias using func name if (includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)) { String expr_flattened = root.toStringTree(); - //remove all TOK tokens + // remove all TOK tokens String expr_no_tok = expr_flattened.replaceAll("TOK_\\S+", ""); - //remove all non alphanumeric letters, replace whitespace spans with underscore - String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_"); + // remove all non alphanumeric letters, replace whitespace spans with underscore + String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_"); - //limit length to 20 chars - if(expr_formatted.length()>AUTOGEN_COLALIAS_PRFX_MAXLENGTH) { + // limit length to 20 chars + if (expr_formatted.length() > AUTOGEN_COLALIAS_PRFX_MAXLENGTH) { expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH); } - //append colnum to make it unique + // append colnum to make it unique colAlias = expr_formatted.concat("_" + colNum); } @@ -2268,7 +2268,7 @@ } boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == - HiveParser.TOK_TRANSFORM); + HiveParser.TOK_TRANSFORM); if (isInTransform) { queryProperties.setUsesScript(true); globalLimitCtx.setHasTransformOrUDTF(true); @@ -2308,14 +2308,14 @@ // Only support a single expression when it's a UDTF if (selExprList.getChildCount() > 1) { throw new SemanticException(generateErrorMessage( - (ASTNode) selExprList.getChild(1), - ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg())); + (ASTNode) selExprList.getChild(1), + ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg())); } // Require an AS for UDTFs for column aliases ASTNode selExpr = (ASTNode) selExprList.getChild(posn); if (selExpr.getChildCount() < 2) { throw new SemanticException(generateErrorMessage(udtfExpr, - ErrorMsg.UDTF_REQUIRE_AS.getMsg())); + ErrorMsg.UDTF_REQUIRE_AS.getMsg())); } // Get the column / table aliases from the expression. Start from 1 as // 0 is the TOK_FUNCTION @@ -2376,8 +2376,8 @@ // AST's are slightly different. if (!isInTransform && !isUDTF && child.getChildCount() > 2) { throw new SemanticException(generateErrorMessage( - (ASTNode) child.getChild(2), - ErrorMsg.INVALID_AS.getMsg())); + (ASTNode) child.getChild(2), + ErrorMsg.INVALID_AS.getMsg())); } // The real expression @@ -2393,7 +2393,7 @@ // Get rid of TOK_SELEXPR expr = (ASTNode) child.getChild(0); String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR, - autogenColAliasPrfxIncludeFuncName, i); + autogenColAliasPrfxIncludeFuncName, i); tabAlias = colRef[0]; colAlias = colRef[1]; if (hasAsClause) { @@ -2406,7 +2406,7 @@ boolean subQuery = qb.getParseInfo().getIsSubQ(); if (expr.getType() == HiveParser.TOK_ALLCOLREF) { pos = genColListRegex(".*", expr.getChildCount() == 0 ? null - : getUnescapedName((ASTNode)expr.getChild(0)).toLowerCase(), + : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery); selectStar = true; } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause @@ -2420,7 +2420,7 @@ } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0) - .getChild(0).getText().toLowerCase())) && !hasAsClause + .getChild(0).getText().toLowerCase())) && !hasAsClause && !inputRR.getIsExprResolver() && isRegex(unescapeIdentifier(expr.getChild(1).getText()))) { // In case the expression is TABLE.COL (col can be regex). @@ -2428,7 +2428,7 @@ // We don't allow this for ExprResolver - the Group By case pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText() - .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch, + .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery); } else { // Case when this is an expression @@ -2446,9 +2446,9 @@ } ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), - exp.getWritableObjectInspector(), tabAlias, false); + exp.getWritableObjectInspector(), tabAlias, false); colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) - .isSkewedCol() : false); + .isSkewedCol() : false); out_rwsch.put(tabAlias, colAlias, colInfo); pos = Integer.valueOf(pos.intValue() + 1); @@ -2471,7 +2471,7 @@ Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new SelectDesc(col_list, columnNames, selectStar), new RowSchema( - out_rwsch.getColumnInfos()), input), out_rwsch); + out_rwsch.getColumnInfos()), input), out_rwsch); output.setColumnExprMap(colExprMap); if (isInTransform) { @@ -2551,7 +2551,7 @@ boolean isDistinct, boolean isAllColumns) throws SemanticException { ArrayList originalParameterTypeInfos = - getWritableObjectInspector(aggParameters); + getWritableObjectInspector(aggParameters); GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator( aggName, originalParameterTypeInfos, isDistinct, isAllColumns); if (null == result) { @@ -2727,9 +2727,9 @@ List inputKeyCols = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames(); if (inputKeyCols.size() > 0) { - lastKeyColName = inputKeyCols.get(inputKeyCols.size()-1); + lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1); } - reduceValues = ((ReduceSinkDesc)reduceSinkOperatorInfo.getConf()).getValueCols(); + reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols(); } int numDistinctUDFs = 0; for (Map.Entry entry : aggregationTrees.entrySet()) { @@ -2746,7 +2746,7 @@ for (int i = 1; i < value.getChildCount(); i++) { ASTNode paraExpr = (ASTNode) value.getChild(i); ColumnInfo paraExprInfo = - groupByInputRowResolver.getExpression(paraExpr); + groupByInputRowResolver.getExpression(paraExpr); if (paraExprInfo == null) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr)); } @@ -2757,8 +2757,8 @@ // if aggr is distinct, the parameter is name is constructed as // KEY.lastKeyColName:._colx paraExpression = Utilities.ReduceField.KEY.name() + "." + - lastKeyColName + ":" + numDistinctUDFs + "." + - getColumnInternalName(i-1); + lastKeyColName + ":" + numDistinctUDFs + "." + + getColumnInternalName(i - 1); } @@ -2800,13 +2800,14 @@ } } float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); - float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); + float memoryThreshold = HiveConf + .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( - new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - false,groupByMemoryUsage,memoryThreshold, null, false, 0), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - reduceSinkOperatorInfo), groupByOutputRowResolver); + new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + false, groupByMemoryUsage, memoryThreshold, null, false, 0), + new RowSchema(groupByOutputRowResolver.getColumnInfos()), + reduceSinkOperatorInfo), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); return op; } @@ -2817,7 +2818,7 @@ // For eg: consider: select key, value, count(1) from T group by key, value with rollup. // Assuming map-side aggregation and no skew, the plan would look like: // - // TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink + // TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink // // This function is called for GroupBy2 to pass the additional grouping keys introduced by // GroupBy1 for the grouping set (corresponding to the rollup). @@ -2828,9 +2829,9 @@ Map colExprMap) throws SemanticException { // For grouping sets, add a dummy grouping key String groupingSetColumnName = - groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); + groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, - groupingSetColumnName, null, false); + groupingSetColumnName, null, false); groupByKeys.add(inputExpr); String field = getColumnInternalName(groupByKeys.size() - 1); @@ -2848,7 +2849,7 @@ // For eg: consider: select key, value, count(1) from T group by key, value with rollup. // Assuming map-side aggregation and no skew, the plan would look like: // - // TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink + // TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink // // This function is called for ReduceSink to add the additional grouping keys introduced by // GroupBy1 into the reduce keys. @@ -2859,16 +2860,16 @@ Map colExprMap) throws SemanticException { // add a key for reduce sink String groupingSetColumnName = - reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); + reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, - groupingSetColumnName, null, false); + groupingSetColumnName, null, false); reduceKeys.add(inputExpr); outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); String field = Utilities.ReduceField.KEY.toString() + "." - + getColumnInternalName(reduceKeys.size() - 1); + + getColumnInternalName(reduceKeys.size() - 1); ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get( - reduceKeys.size() - 1).getTypeInfo(), null, true); + reduceKeys.size() - 1).getTypeInfo(), null, true); reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo); colExprMap.put(colInfo.getInternalName(), inputExpr); } @@ -2923,11 +2924,11 @@ // For grouping sets, add a dummy grouping key if (groupingSetsPresent) { addGroupingSetKey( - groupByKeys, - groupByInputRowResolver, - groupByOutputRowResolver, - outputColumnNames, - colExprMap); + groupByKeys, + groupByInputRowResolver, + groupByOutputRowResolver, + outputColumnNames, + colExprMap); } HashMap aggregationTrees = parseInfo @@ -2940,9 +2941,9 @@ List inputKeyCols = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames(); if (inputKeyCols.size() > 0) { - lastKeyColName = inputKeyCols.get(inputKeyCols.size()-1); + lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1); } - reduceValues = ((ReduceSinkDesc)reduceSinkOperatorInfo.getConf()).getValueCols(); + reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols(); } int numDistinctUDFs = 0; for (Map.Entry entry : aggregationTrees.entrySet()) { @@ -2967,7 +2968,7 @@ for (int i = 1; i < value.getChildCount(); i++) { ASTNode paraExpr = (ASTNode) value.getChild(i); ColumnInfo paraExprInfo = - groupByInputRowResolver.getExpression(paraExpr); + groupByInputRowResolver.getExpression(paraExpr); if (paraExprInfo == null) { throw new SemanticException(ErrorMsg.INVALID_COLUMN .getMsg(paraExpr)); @@ -2979,8 +2980,8 @@ // if aggr is distinct, the parameter is name is constructed as // KEY.lastKeyColName:._colx paraExpression = Utilities.ReduceField.KEY.name() + "." + - lastKeyColName + ":" + numDistinctUDFs + "." - + getColumnInternalName(i-1); + lastKeyColName + ":" + numDistinctUDFs + "." + + getColumnInternalName(i - 1); } @@ -3007,7 +3008,7 @@ assert (paraExpression != null); aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo - .getIsVirtualCol())); + .getIsVirtualCol())); } if (isDistinct) { numDistinctUDFs++; @@ -3038,16 +3039,17 @@ field, udaf.returnType, "", false)); } float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); - float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); + float memoryThreshold = HiveConf + .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); // Nothing special needs to be done for grouping sets. // This is the final group by operator, so multiple rows corresponding to the // grouping sets have been generated upstream. Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( - new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - distPartAgg,groupByMemoryUsage,memoryThreshold, null, false, 0), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo), - groupByOutputRowResolver); + new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + distPartAgg, groupByMemoryUsage, memoryThreshold, null, false, 0), + new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo), + groupByOutputRowResolver); op.setColumnExprMap(colExprMap); return op; } @@ -3087,13 +3089,13 @@ for (int i = 0; i < grpByExprs.size(); ++i) { ASTNode grpbyExpr = grpByExprs.get(i); ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, - groupByInputRowResolver); + groupByInputRowResolver); groupByKeys.add(grpByExprNode); String field = getColumnInternalName(i); outputColumnNames.add(field); groupByOutputRowResolver.putExpression(grpbyExpr, - new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false)); + new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -3105,7 +3107,7 @@ // For eg: consider: select key, value, count(1) from T group by key, value with rollup. // Assuming map-side aggregation and no skew, the plan would look like: // - // TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink + // TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink // // This function is called for GroupBy1 to create an additional grouping key // for the grouping set (corresponding to the rollup). @@ -3117,29 +3119,29 @@ String field = getColumnInternalName(groupByKeys.size() - 1); outputColumnNames.add(field); groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), - new ColumnInfo( - field, - TypeInfoFactory.stringTypeInfo, - null, - true)); + new ColumnInfo( + field, + TypeInfoFactory.stringTypeInfo, + null, + true)); colExprMap.put(field, constant); } // If there is a distinctFuncExp, add all parameters to the reduceKeys. if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) { List list = parseInfo.getDistinctFuncExprsForClause(dest); - for(ASTNode value: list) { + for (ASTNode value : list) { // 0 is function name for (int i = 1; i < value.getChildCount(); i++) { ASTNode parameter = (ASTNode) value.getChild(i); if (groupByOutputRowResolver.getExpression(parameter) == null) { ExprNodeDesc distExprNode = genExprNodeDesc(parameter, - groupByInputRowResolver); + groupByInputRowResolver); groupByKeys.add(distExprNode); - String field = getColumnInternalName(groupByKeys.size()-1); + String field = getColumnInternalName(groupByKeys.size() - 1); outputColumnNames.add(field); groupByOutputRowResolver.putExpression(parameter, new ColumnInfo( - field, distExprNode.getTypeInfo(), "", false)); + field, distExprNode.getTypeInfo(), "", false)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } } @@ -3161,7 +3163,7 @@ for (int i = 1; i < value.getChildCount(); i++) { ASTNode paraExpr = (ASTNode) value.getChild(i); ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, - groupByInputRowResolver); + groupByInputRowResolver); aggParameters.add(paraExprNode); } @@ -3190,13 +3192,14 @@ } } float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); - float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); + float memoryThreshold = HiveConf + .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( - new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - false,groupByMemoryUsage,memoryThreshold, - groupingSetKeys, groupingSetsPresent, groupingSetsPosition), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - inputOperatorInfo), groupByOutputRowResolver); + new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + false, groupByMemoryUsage, memoryThreshold, + groupingSetKeys, groupingSetsPresent, groupingSetsPosition), + new RowSchema(groupByOutputRowResolver.getColumnInfos()), + inputOperatorInfo), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); return op; } @@ -3238,18 +3241,18 @@ List outputValueColumnNames = new ArrayList(); ArrayList reduceKeys = getReduceKeysForReduceSink(grpByExprs, dest, - reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, - colExprMap); + reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, + colExprMap); // add a key for reduce sink if (groupingSetsPresent) { // Process grouping set for the reduce sink operator processGroupingSetReduceSinkOperator( - reduceSinkInputRowResolver, - reduceSinkOutputRowResolver, - reduceKeys, - outputKeyColumnNames, - colExprMap); + reduceSinkInputRowResolver, + reduceSinkOutputRowResolver, + reduceKeys, + outputKeyColumnNames, + colExprMap); if (changeNumPartitionFields) { numPartitionFields++; @@ -3286,14 +3289,14 @@ } ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild( - PlanUtils.getReduceSinkDesc(reduceKeys, - groupingSetsPresent ? grpByExprs.size() + 1 : grpByExprs.size(), - reduceValues, distinctColIndices, - outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionFields, - numReducers), - new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo), - reduceSinkOutputRowResolver); + OperatorFactory.getAndMakeChild( + PlanUtils.getReduceSinkDesc(reduceKeys, + groupingSetsPresent ? grpByExprs.size() + 1 : grpByExprs.size(), + reduceValues, distinctColIndices, + outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionFields, + numReducers), + new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo), + reduceSinkOutputRowResolver); rsOp.setColumnExprMap(colExprMap); return rsOp; } @@ -3308,7 +3311,7 @@ for (int i = 0; i < grpByExprs.size(); ++i) { ASTNode grpbyExpr = grpByExprs.get(i); ExprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr, - reduceSinkInputRowResolver); + reduceSinkInputRowResolver); reduceKeys.add(inputExpr); if (reduceSinkOutputRowResolver.getExpression(grpbyExpr) == null) { outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); @@ -3327,7 +3330,8 @@ return reduceKeys; } - private List> getDistinctColIndicesForReduceSink(QBParseInfo parseInfo, String dest, + private List> getDistinctColIndicesForReduceSink(QBParseInfo parseInfo, + String dest, List reduceKeys, RowResolver reduceSinkInputRowResolver, RowResolver reduceSinkOutputRowResolver, List outputKeyColumnNames) throws SemanticException { @@ -3363,8 +3367,8 @@ distinctIndices.add(ri); String name = getColumnInternalName(numExprs); String field = Utilities.ReduceField.KEY.toString() + "." + colName - + ":" + i - + "." + name; + + ":" + i + + "." + name; ColumnInfo colInfo = new ColumnInfo(field, expr.getTypeInfo(), null, false); reduceSinkOutputRowResolver.putExpression(parameter, colInfo); numExprs++; @@ -3464,10 +3468,10 @@ ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, - grpByExprs.size(), reduceValues, distinctColIndices, - outputKeyColumnNames, outputValueColumnNames, true, -1, grpByExprs.size(), - -1), new RowSchema(reduceSinkOutputRowResolver - .getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver); + grpByExprs.size(), reduceValues, distinctColIndices, + outputKeyColumnNames, outputValueColumnNames, true, -1, grpByExprs.size(), + -1), new RowSchema(reduceSinkOutputRowResolver + .getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver); rsOp.setColumnExprMap(colExprMap); return rsOp; } @@ -3488,10 +3492,10 @@ return nodes; } for (int i = 0; i < node.getChildCount(); i++) { - ASTNode child = (ASTNode)node.getChild(i); + ASTNode child = (ASTNode) node.getChild(i); if (child.getType() == HiveParser.TOK_TABLE_OR_COL && child.getChild(0) != null && inputRR.get(null, - BaseSemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText())) != null) { + BaseSemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText())) != null) { nodes.add(child); } else { nodes.addAll(getColumnExprsFromASTNode(child, inputRR)); @@ -3552,11 +3556,11 @@ // Note that partitioning fields dont need to change, since it is either // partitioned randomly, or by all grouping keys + distinct keys processGroupingSetReduceSinkOperator( - reduceSinkInputRowResolver2, - reduceSinkOutputRowResolver2, - reduceKeys, - outputColumnNames, - colExprMap); + reduceSinkInputRowResolver2, + reduceSinkOutputRowResolver2, + reduceKeys, + outputColumnNames, + colExprMap); } // Get partial aggregation results and store in reduceValues @@ -3580,9 +3584,9 @@ ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, - reduceValues, outputColumnNames, true, -1, numPartitionFields, - numReducers), new RowSchema(reduceSinkOutputRowResolver2 - .getColumnInfos()), groupByOperatorInfo), + reduceValues, outputColumnNames, true, -1, numPartitionFields, + numReducers), new RowSchema(reduceSinkOutputRowResolver2 + .getColumnInfos()), groupByOperatorInfo), reduceSinkOutputRowResolver2); rsOp.setColumnExprMap(colExprMap); @@ -3639,11 +3643,11 @@ // For grouping sets, add a dummy grouping key if (groupingSetsPresent) { addGroupingSetKey( - groupByKeys, - groupByInputRowResolver2, - groupByOutputRowResolver2, - outputColumnNames, - colExprMap); + groupByKeys, + groupByInputRowResolver2, + groupByOutputRowResolver2, + outputColumnNames, + colExprMap); } HashMap aggregationTrees = parseInfo @@ -3659,7 +3663,7 @@ assert (paraExpression != null); aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo - .getIsVirtualCol())); + .getIsVirtualCol())); String aggName = unescapeIdentifier(value.getChild(0).getText()); @@ -3677,7 +3681,7 @@ udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && value.getToken().getType() == - HiveParser.TOK_FUNCTIONDI), + HiveParser.TOK_FUNCTIONDI), amode)); String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); @@ -3686,13 +3690,14 @@ field, udaf.returnType, "", false)); } float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); - float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); + float memoryThreshold = HiveConf + .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( - new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - false,groupByMemoryUsage,memoryThreshold, null, false, 0), - new RowSchema(groupByOutputRowResolver2.getColumnInfos()), - reduceSinkOperatorInfo2), groupByOutputRowResolver2); + new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + false, groupByMemoryUsage, memoryThreshold, null, false, 0), + new RowSchema(groupByOutputRowResolver2.getColumnInfos()), + reduceSinkOperatorInfo2), groupByOutputRowResolver2); op.setColumnExprMap(colExprMap); return op; } @@ -3730,7 +3735,7 @@ int numReducers = -1; ObjectPair, List> grpByExprsGroupingSets = - getGroupByGroupingSetsForClause(parseInfo, dest); + getGroupByGroupingSetsForClause(parseInfo, dest); List grpByExprs = grpByExprsGroupingSets.getFirst(); List groupingSets = grpByExprsGroupingSets.getSecond(); @@ -3746,15 +3751,15 @@ // ////// 1. Generate ReduceSinkOperator Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, - dest, - input, - grpByExprs, - grpByExprs.size(), - false, - numReducers, - false, - false); + genGroupByPlanReduceSinkOperator(qb, + dest, + input, + grpByExprs, + grpByExprs.size(), + false, + numReducers, + false, + false); // ////// 2. Generate GroupbyOperator Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, @@ -3782,7 +3787,7 @@ if (whereExpr != null) { OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); - ExprNodeDesc current = genExprNodeDesc((ASTNode)whereExpr.getChild(0), inputRR); + ExprNodeDesc current = genExprNodeDesc((ASTNode) whereExpr.getChild(0), inputRR); // Check the list of where expressions already added so they aren't duplicated ExprNodeDesc.ExprNodeDescEqualityWrapper currentWrapped = @@ -3819,8 +3824,8 @@ FilterDesc orFilterDesc = new FilterDesc(previous, false); selectInput = putOpInsertMap(OperatorFactory.getAndMakeChild( - orFilterDesc, new RowSchema( - inputRR.getColumnInfos()), input), inputRR); + orFilterDesc, new RowSchema( + inputRR.getColumnInfos()), input), inputRR); } // insert a select operator here used by the ColumnPruner to reduce @@ -3899,7 +3904,7 @@ // ////// Generate GroupbyOperator for a map-side partial aggregation Map genericUDAFEvaluators = - new LinkedHashMap(); + new LinkedHashMap(); QBParseInfo parseInfo = qb.getParseInfo(); @@ -3912,7 +3917,7 @@ // ////// 3. Generate ReduceSinkOperator2 Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( - parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers, false); + parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers, false); // ////// 4. Generate GroupbyOperator2 Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator2MR(parseInfo, @@ -3970,7 +3975,7 @@ QBParseInfo parseInfo = qb.getParseInfo(); ObjectPair, List> grpByExprsGroupingSets = - getGroupByGroupingSetsForClause(parseInfo, dest); + getGroupByGroupingSetsForClause(parseInfo, dest); List grpByExprs = grpByExprsGroupingSets.getFirst(); List groupingSets = grpByExprsGroupingSets.getSecond(); @@ -3990,22 +3995,22 @@ // operator. We set the numPartitionColumns to -1 for this purpose. This is // captured by WritableComparableHiveObject.hashCode() function. Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, - dest, - input, - grpByExprs, - (parseInfo.getDistinctFuncExprsForClause(dest).isEmpty() ? -1 : Integer.MAX_VALUE), - false, - -1, - false, - false); + genGroupByPlanReduceSinkOperator(qb, + dest, + input, + grpByExprs, + (parseInfo.getDistinctFuncExprsForClause(dest).isEmpty() ? -1 : Integer.MAX_VALUE), + false, + -1, + false, + false); // ////// 2. Generate GroupbyOperator Map genericUDAFEvaluators = - new LinkedHashMap(); + new LinkedHashMap(); GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator( - parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1, - genericUDAFEvaluators); + parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1, + genericUDAFEvaluators); int numReducers = -1; if (grpByExprs.isEmpty()) { @@ -4014,12 +4019,12 @@ // ////// 3. Generate ReduceSinkOperator2 Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( - parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers, false); + parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers, false); // ////// 4. Generate GroupbyOperator2 Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator2MR(parseInfo, - dest, reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL, - genericUDAFEvaluators, false); + dest, reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL, + genericUDAFEvaluators, false); return groupByOperatorInfo2; } @@ -4040,13 +4045,13 @@ static private void extractColumns(Set colNamesExprs, ExprNodeDesc exprNode) throws SemanticException { if (exprNode instanceof ExprNodeColumnDesc) { - colNamesExprs.add(((ExprNodeColumnDesc)exprNode).getColumn()); + colNamesExprs.add(((ExprNodeColumnDesc) exprNode).getColumn()); return; } if (exprNode instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc)exprNode; - for (ExprNodeDesc childExpr: funcDesc.getChildExprs()) { + ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) exprNode; + for (ExprNodeDesc childExpr : funcDesc.getChildExprs()) { extractColumns(colNamesExprs, childExpr); } } @@ -4065,7 +4070,7 @@ private void checkExpressionsForGroupingSet(List grpByExprs, List distinctGrpByExprs, Map aggregationTrees, - RowResolver inputRowResolver) throws SemanticException { + RowResolver inputRowResolver) throws SemanticException { Set colNamesGroupByExprs = new HashSet(); Set colNamesGroupByDistinctExprs = new HashSet(); @@ -4079,7 +4084,7 @@ // If there is a distinctFuncExp, add all parameters to the reduceKeys. if (!distinctGrpByExprs.isEmpty()) { - for(ASTNode value: distinctGrpByExprs) { + for (ASTNode value : distinctGrpByExprs) { // 0 is function name for (int i = 1; i < value.getChildCount(); i++) { ASTNode parameter = (ASTNode) value.getChild(i); @@ -4089,8 +4094,7 @@ } if (hasCommonElement(colNamesGroupByExprs, colNamesGroupByDistinctExprs)) { - throw - new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_EXPRESSION_INVALID.getMsg()); + throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_EXPRESSION_INVALID.getMsg()); } } } @@ -4108,8 +4112,7 @@ } if (hasCommonElement(colNamesGroupByExprs, colNamesAggregateParameters)) { - throw - new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_EXPRESSION_INVALID.getMsg()); + throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_EXPRESSION_INVALID.getMsg()); } } } @@ -4138,7 +4141,7 @@ QBParseInfo parseInfo = qb.getParseInfo(); ObjectPair, List> grpByExprsGroupingSets = - getGroupByGroupingSetsForClause(parseInfo, dest); + getGroupByGroupingSetsForClause(parseInfo, dest); List grpByExprs = grpByExprsGroupingSets.getFirst(); List groupingSets = grpByExprsGroupingSets.getSecond(); @@ -4146,27 +4149,27 @@ if (groupingSetsPresent) { checkExpressionsForGroupingSet(grpByExprs, - parseInfo.getDistinctFuncExprsForClause(dest), - parseInfo.getAggregationExprsForClause(dest), - opParseCtx.get(inputOperatorInfo).getRowResolver()); + parseInfo.getDistinctFuncExprsForClause(dest), + parseInfo.getAggregationExprsForClause(dest), + opParseCtx.get(inputOperatorInfo).getRowResolver()); } // ////// Generate GroupbyOperator for a map-side partial aggregation Map genericUDAFEvaluators = - new LinkedHashMap(); + new LinkedHashMap(); GroupByOperator groupByOperatorInfo = - (GroupByOperator) genGroupByPlanMapGroupByOperator( - qb, - dest, - grpByExprs, - inputOperatorInfo, - GroupByDesc.Mode.HASH, - genericUDAFEvaluators, - groupingSets, - groupingSetsPresent); + (GroupByOperator) genGroupByPlanMapGroupByOperator( + qb, + dest, + grpByExprs, + inputOperatorInfo, + GroupByDesc.Mode.HASH, + genericUDAFEvaluators, + groupingSets, + groupingSetsPresent); groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( - inputOperatorInfo).getRowResolver().getTableNames()); + inputOperatorInfo).getRowResolver().getTableNames()); int numReducers = -1; // Optimize the scenario when there are no grouping keys - only 1 reducer is @@ -4177,15 +4180,15 @@ // ////// Generate ReduceSink Operator Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, - dest, - groupByOperatorInfo, - grpByExprs, - grpByExprs.size(), - true, - numReducers, - true, - groupingSetsPresent); + genGroupByPlanReduceSinkOperator(qb, + dest, + groupByOperatorInfo, + grpByExprs, + grpByExprs.size(), + true, + numReducers, + true, + groupingSetsPresent); // This is a 1-stage map-reduce processing of the groupby. Tha map-side // aggregates was just used to @@ -4195,8 +4198,8 @@ // used, and merge is invoked // on the reducer. return genGroupByPlanGroupByOperator1(parseInfo, dest, - reduceSinkOperatorInfo, GroupByDesc.Mode.MERGEPARTIAL, - genericUDAFEvaluators, false, groupingSetsPresent); + reduceSinkOperatorInfo, GroupByDesc.Mode.MERGEPARTIAL, + genericUDAFEvaluators, false, groupingSetsPresent); } /** @@ -4237,7 +4240,7 @@ QBParseInfo parseInfo = qb.getParseInfo(); ObjectPair, List> grpByExprsGroupingSets = - getGroupByGroupingSetsForClause(parseInfo, dest); + getGroupByGroupingSetsForClause(parseInfo, dest); List grpByExprs = grpByExprsGroupingSets.getFirst(); List groupingSets = grpByExprsGroupingSets.getSecond(); @@ -4245,18 +4248,18 @@ if (groupingSetsPresent) { checkExpressionsForGroupingSet(grpByExprs, - parseInfo.getDistinctFuncExprsForClause(dest), - parseInfo.getAggregationExprsForClause(dest), - opParseCtx.get(inputOperatorInfo).getRowResolver()); + parseInfo.getDistinctFuncExprsForClause(dest), + parseInfo.getAggregationExprsForClause(dest), + opParseCtx.get(inputOperatorInfo).getRowResolver()); } // ////// Generate GroupbyOperator for a map-side partial aggregation Map genericUDAFEvaluators = - new LinkedHashMap(); + new LinkedHashMap(); GroupByOperator groupByOperatorInfo = - (GroupByOperator) genGroupByPlanMapGroupByOperator( - qb, dest, grpByExprs, inputOperatorInfo, GroupByDesc.Mode.HASH, - genericUDAFEvaluators, groupingSets, groupingSetsPresent); + (GroupByOperator) genGroupByPlanMapGroupByOperator( + qb, dest, grpByExprs, inputOperatorInfo, GroupByDesc.Mode.HASH, + genericUDAFEvaluators, groupingSets, groupingSetsPresent); groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( inputOperatorInfo).getRowResolver().getTableNames()); @@ -4268,20 +4271,20 @@ // ////// Generate ReduceSink Operator Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, - dest, - groupByOperatorInfo, - grpByExprs, - distinctFuncExprs.isEmpty() ? -1 : Integer.MAX_VALUE, - false, - -1, - true, - groupingSetsPresent); + genGroupByPlanReduceSinkOperator(qb, + dest, + groupByOperatorInfo, + grpByExprs, + distinctFuncExprs.isEmpty() ? -1 : Integer.MAX_VALUE, + false, + -1, + true, + groupingSetsPresent); // ////// Generate GroupbyOperator for a partial aggregation Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, - dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS, - genericUDAFEvaluators, false, groupingSetsPresent); + dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS, + genericUDAFEvaluators, false, groupingSetsPresent); int numReducers = -1; if (grpByExprs.isEmpty()) { @@ -4290,31 +4293,31 @@ // ////// Generate ReduceSinkOperator2 Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR( - parseInfo, dest, groupByOperatorInfo2, grpByExprs.size(), numReducers, - groupingSetsPresent); + parseInfo, dest, groupByOperatorInfo2, grpByExprs.size(), numReducers, + groupingSetsPresent); // ////// Generate GroupbyOperator3 return genGroupByPlanGroupByOperator2MR(parseInfo, dest, - reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL, - genericUDAFEvaluators, groupingSetsPresent); + reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL, + genericUDAFEvaluators, groupingSetsPresent); } else { // If there are no grouping keys, grouping sets cannot be present assert !groupingSetsPresent; // ////// Generate ReduceSink Operator Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(qb, - dest, - groupByOperatorInfo, - grpByExprs, - grpByExprs.size(), - false, - 1, - true, - groupingSetsPresent); + genGroupByPlanReduceSinkOperator(qb, + dest, + groupByOperatorInfo, + grpByExprs, + grpByExprs.size(), + false, + 1, + true, + groupingSetsPresent); return genGroupByPlanGroupByOperator2MR(parseInfo, dest, - reduceSinkOperatorInfo, GroupByDesc.Mode.FINAL, genericUDAFEvaluators, false); + reduceSinkOperatorInfo, GroupByDesc.Mode.FINAL, genericUDAFEvaluators, false); } } @@ -4341,10 +4344,10 @@ } private int getReducersBucketing(int totalFiles, int maxReducers) { - int numFiles = totalFiles/maxReducers; + int numFiles = totalFiles / maxReducers; while (true) { - if (totalFiles%numFiles == 0) { - return totalFiles/numFiles; + if (totalFiles % numFiles == 0) { + return totalFiles / numFiles; } numFiles++; } @@ -4353,8 +4356,8 @@ private static class SortBucketRSCtx { ArrayList partnCols; boolean multiFileSpray; - int numFiles; - int totalFiles; + int numFiles; + int totalFiles; public SortBucketRSCtx() { partnCols = null; @@ -4371,7 +4374,8 @@ } /** - * @param partnCols the partnCols to set + * @param partnCols + * the partnCols to set */ public void setPartnCols(ArrayList partnCols) { this.partnCols = partnCols; @@ -4385,7 +4389,8 @@ } /** - * @param multiFileSpray the multiFileSpray to set + * @param multiFileSpray + * the multiFileSpray to set */ public void setMultiFileSpray(boolean multiFileSpray) { this.multiFileSpray = multiFileSpray; @@ -4399,7 +4404,8 @@ } /** - * @param numFiles the numFiles to set + * @param numFiles + * the numFiles to set */ public void setNumFiles(int numFiles) { this.numFiles = numFiles; @@ -4413,7 +4419,8 @@ } /** - * @param totalFiles the totalFiles to set + * @param totalFiles + * the totalFiles to set */ public void setTotalFiles(int totalFiles) { this.totalFiles = totalFiles; @@ -4421,8 +4428,9 @@ } @SuppressWarnings("nls") - private Operator genBucketingSortingDest(String dest, Operator input, QB qb, TableDesc table_desc, - Table dest_tab, SortBucketRSCtx ctx) + private Operator genBucketingSortingDest(String dest, Operator input, QB qb, + TableDesc table_desc, + Table dest_tab, SortBucketRSCtx ctx) throws SemanticException { // If the table is bucketed, and bucketing is enforced, do the following: @@ -4432,20 +4440,21 @@ // spray the data into multiple buckets. That way, we can support a very large // number of buckets without needing a very large number of reducers. boolean enforceBucketing = false; - boolean enforceSorting = false; + boolean enforceSorting = false; ArrayList partnCols = new ArrayList(); ArrayList partnColsNoConvert = new ArrayList(); - ArrayList sortCols = new ArrayList(); + ArrayList sortCols = new ArrayList(); ArrayList sortOrders = new ArrayList(); boolean multiFileSpray = false; - int numFiles = 1; - int totalFiles = 1; + int numFiles = 1; + int totalFiles = 1; if ((dest_tab.getNumBuckets() > 0) && (conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCEBUCKETING))) { enforceBucketing = true; partnCols = getParitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, true); - partnColsNoConvert = getParitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, false); + partnColsNoConvert = getParitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, + false); } if ((dest_tab.getSortCols() != null) && @@ -4465,7 +4474,7 @@ if (conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS) > 0) { maxReducers = conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS); } - int numBuckets = dest_tab.getNumBuckets(); + int numBuckets = dest_tab.getNumBuckets(); if (numBuckets > maxReducers) { multiFileSpray = true; totalFiles = numBuckets; @@ -4475,7 +4484,7 @@ else { // find the number of reducers such that it is a divisor of totalFiles maxReducers = getReducersBucketing(totalFiles, maxReducers); - numFiles = totalFiles/maxReducers; + numFiles = totalFiles / maxReducers; } } else { @@ -4488,7 +4497,7 @@ ctx.setNumFiles(numFiles); ctx.setPartnCols(partnColsNoConvert); ctx.setTotalFiles(totalFiles); - //disable "merge mapfiles" and "merge mapred files". + // disable "merge mapfiles" and "merge mapred files". HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPFILES, false); HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false); } @@ -4497,6 +4506,7 @@ /** * Check for HOLD_DDLTIME hint. + * * @param qb * @return true if HOLD_DDLTIME is set, false otherwise. */ @@ -4522,7 +4532,7 @@ QBMetaData qbm = qb.getMetaData(); Integer dest_type = qbm.getDestTypeForAlias(dest); - Table dest_tab = null; // destination table if any + Table dest_tab = null; // destination table if any Partition dest_part = null;// destination partition if any String queryTmpdir = null; // the intermediate destination directory Path dest_path = null; // the final destination directory @@ -4543,7 +4553,7 @@ // Is the user trying to insert into a external tables if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && (dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE))) { - throw new SemanticException( + throw new SemanticException( ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName())); } @@ -4553,17 +4563,17 @@ // check for partition List parts = dest_tab.getPartitionKeys(); if (parts != null && parts.size() > 0) { // table is partitioned - if (partSpec== null || partSpec.size() == 0) { // user did NOT specify partition + if (partSpec == null || partSpec.size() == 0) { // user did NOT specify partition throw new SemanticException(generateErrorMessage( - qb.getParseInfo().getDestForClause(dest), - ErrorMsg.NEED_PARTITION_ERROR.getMsg())); + qb.getParseInfo().getDestForClause(dest), + ErrorMsg.NEED_PARTITION_ERROR.getMsg())); } // the HOLD_DDLTIIME hint should not be used with dynamic partition since the // newly generated partitions should always update their DDLTIME if (holdDDLTime) { throw new SemanticException(generateErrorMessage( - qb.getParseInfo().getDestForClause(dest), - ErrorMsg.HOLD_DDLTIME_ON_NONEXIST_PARTITIONS.getMsg())); + qb.getParseInfo().getDestForClause(dest), + ErrorMsg.HOLD_DDLTIME_ON_NONEXIST_PARTITIONS.getMsg())); } dpCtx = qbm.getDPCtx(dest); if (dpCtx == null) { @@ -4577,8 +4587,8 @@ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP if (dpCtx.getNumDPCols() > 0 && (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPFILES) || - HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPREDFILES)) && - Utilities.supportCombineFileInputFormat() == false) { + HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPREDFILES)) && + Utilities.supportCombineFileInputFormat() == false) { // Do not support merge for Hadoop versions (pre-0.20) that do not // support CombineHiveInputFormat HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEMERGEMAPFILES, false); @@ -4589,8 +4599,8 @@ } else { // QBMetaData.DEST_PARTITION capture the all-SP case throw new SemanticException(generateErrorMessage( - qb.getParseInfo().getDestForClause(dest), - ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg())); + qb.getParseInfo().getDestForClause(dest), + ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg())); } if (dpCtx.getSPPath() != null) { dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath()); @@ -4660,7 +4670,7 @@ else { try { String ppath = dpCtx.getSPPath(); - ppath = ppath.substring(0, ppath.length()-1); + ppath = ppath.substring(0, ppath.length() - 1); DummyPartition p = new DummyPartition(dest_tab, dest_tab.getDbName() + "@" + dest_tab.getTableName() + "@" + ppath, @@ -4680,15 +4690,15 @@ dest_tab = dest_part.getTable(); if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE)) { - throw new SemanticException( + throw new SemanticException( ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName())); } Path tabPath = dest_tab.getPath(); Path partPath = dest_part.getPartitionPath(); - // if the table is in a different dfs than the partition, - // replace the partition's dfs with the table's dfs. + // if the table is in a different dfs than the partition, + // replace the partition's dfs with the table's dfs. dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri() .getAuthority(), partPath.toUri().getPath()); @@ -4716,8 +4726,8 @@ Partition part = db.getPartition(dest_tab, dest_part.getSpec(), false); if (part == null) { throw new SemanticException(generateErrorMessage( - qb.getParseInfo().getDestForClause(dest), - ErrorMsg.HOLD_DDLTIME_ON_NONEXIST_PARTITIONS.getMsg())); + qb.getParseInfo().getDestForClause(dest), + ErrorMsg.HOLD_DDLTIME_ON_NONEXIST_PARTITIONS.getMsg())); } } catch (HiveException e) { throw new SemanticException(e); @@ -4864,7 +4874,7 @@ for (int i = 0; i < fields.size(); i++) { vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils .getTypeInfoFromObjectInspector(fields.get(i) - .getFieldObjectInspector()), "", false)); + .getFieldObjectInspector()), "", false)); } } catch (Exception e) { throw new SemanticException(e.getMessage(), e); @@ -4873,15 +4883,15 @@ RowSchema fsRS = new RowSchema(vecCol); FileSinkDesc fileSinkDesc = new FileSinkDesc( - queryTmpdir, - table_desc, - conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), - currentTableId, - rsCtx.isMultiFileSpray(), - rsCtx.getNumFiles(), - rsCtx.getTotalFiles(), - rsCtx.getPartnCols(), - dpCtx); + queryTmpdir, + table_desc, + conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), + currentTableId, + rsCtx.isMultiFileSpray(), + rsCtx.getNumFiles(), + rsCtx.getTotalFiles(), + rsCtx.getPartnCols(), + dpCtx); /* Set List Bucketing context. */ if (lbCtx != null) { @@ -4892,7 +4902,7 @@ // set the stats publishing/aggregating key prefix // the same as directory name. The directory name - // can be changed in the optimizer but the key should not be changed + // can be changed in the optimizer but the key should not be changed // it should be the same as the MoveWork's sourceDir. fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName()); @@ -4908,16 +4918,16 @@ } Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, - fsRS, input), inputRR); + fsRS, input), inputRR); if (ltd != null && SessionState.get() != null) { SessionState.get().getLineageState() - .mapDirToFop(ltd.getSourceDir(), (FileSinkOperator)output); + .mapDirToFop(ltd.getSourceDir(), (FileSinkOperator) output); } if (LOG.isDebugEnabled()) { LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " - + dest_path + " row schema: " + inputRR.toString()); + + dest_path + " row schema: " + inputRR.toString()); } return output; @@ -4947,7 +4957,7 @@ int inColumnCnt = rowFields.size(); int outColumnCnt = tableFields.size(); if (dynPart && dpCtx != null) { - outColumnCnt += dpCtx.getNumDPCols(); + outColumnCnt += dpCtx.getNumDPCols(); } if (inColumnCnt != outColumnCnt) { @@ -4955,7 +4965,7 @@ + " columns, but query has " + inColumnCnt + " columns."; throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg( qb.getParseInfo().getDestForClause(dest), reason)); - } else if (dynPart && dpCtx != null){ + } else if (dynPart && dpCtx != null) { // create the mapping from input ExprNode to dest table DP column dpCtx.mapInputToDP(rowFields.subList(tableFields.size(), rowFields.size())); } @@ -4986,8 +4996,8 @@ // JSON-format. if (!tableFieldTypeInfo.equals(rowFieldTypeInfo) && !(isLazySimpleSerDe - && tableFieldTypeInfo.getCategory().equals(Category.PRIMITIVE) && tableFieldTypeInfo - .equals(TypeInfoFactory.stringTypeInfo))) { + && tableFieldTypeInfo.getCategory().equals(Category.PRIMITIVE) && tableFieldTypeInfo + .equals(TypeInfoFactory.stringTypeInfo))) { // need to do some conversions here converted = true; if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) { @@ -5012,7 +5022,7 @@ // deal with dynamic partition columns: convert ExprNodeDesc type to String?? if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) { // DP columns starts with tableFields.size() - for (int i = tableFields.size(); i < rowFields.size(); ++i ) { + for (int i = tableFields.size(); i < rowFields.size(); ++i) { TypeInfo rowFieldTypeInfo = rowFields.get(i).getType(); ExprNodeDesc column = new ExprNodeColumnDesc( rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", false); @@ -5033,7 +5043,7 @@ } Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new SelectDesc(expressions, colName), new RowSchema(rowResolver - .getColumnInfos()), input), rowResolver); + .getColumnInfos()), input), rowResolver); return output; } else { @@ -5063,7 +5073,7 @@ if (LOG.isDebugEnabled()) { LOG.debug("Created LimitOperator Plan for clause: " + dest - + " row schema: " + inputRR.toString()); + + " row schema: " + inputRR.toString()); } return limitMap; @@ -5093,7 +5103,7 @@ if (LOG.isDebugEnabled()) { LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " - + colAliases); + + colAliases); } // Use the RowResolver from the input operator to generate a input @@ -5120,7 +5130,7 @@ if (numUdtfCols != numSuppliedAliases) { throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH .getMsg("expected " + numUdtfCols + " aliases " + "but got " - + numSuppliedAliases)); + + numSuppliedAliases)); } // Generate the output column info's / row resolver using internal names. @@ -5176,10 +5186,10 @@ } private ArrayList getParitionColsFromBucketCols(String dest, QB qb, Table tab, - TableDesc table_desc, Operator input, boolean convert) - throws SemanticException { + TableDesc table_desc, Operator input, boolean convert) + throws SemanticException { List tabBucketCols = tab.getBucketCols(); - List tabCols = tab.getCols(); + List tabCols = tab.getCols(); // Partition by the bucketing column List posns = new ArrayList(); @@ -5198,8 +5208,9 @@ return genConvertCol(dest, qb, tab, table_desc, input, posns, convert); } - private ArrayList genConvertCol(String dest, QB qb, Table tab, TableDesc table_desc, Operator input, - List posns, boolean convert) throws SemanticException { + private ArrayList genConvertCol(String dest, QB qb, Table tab, + TableDesc table_desc, Operator input, + List posns, boolean convert) throws SemanticException { StructObjectInspector oi = null; try { Deserializer deserializer = table_desc.getDeserializerClass() @@ -5217,12 +5228,13 @@ // Check column type int columnNumber = posns.size(); ArrayList expressions = new ArrayList(columnNumber); - for (Integer posn: posns) { + for (Integer posn : posns) { ObjectInspector tableFieldOI = tableFields.get(posn).getFieldObjectInspector(); TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI); TypeInfo rowFieldTypeInfo = rowFields.get(posn).getType(); - ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(posn).getInternalName(), - rowFields.get(posn).getTabAlias(), rowFields.get(posn).getIsVirtualCol()); + ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(posn) + .getInternalName(), + rowFields.get(posn).getTabAlias(), rowFields.get(posn).getIsVirtualCol()); if (convert && !tableFieldTypeInfo.equals(rowFieldTypeInfo)) { // need to do some conversions here @@ -5231,14 +5243,14 @@ column = null; } else { column = TypeCheckProcFactory.DefaultExprProcessor - .getFuncExprNodeDesc(tableFieldTypeInfo.getTypeName(), - column); + .getFuncExprNodeDesc(tableFieldTypeInfo.getTypeName(), + column); } if (column == null) { String reason = "Cannot convert column " + posn + " from " - + rowFieldTypeInfo + " to " + tableFieldTypeInfo + "."; + + rowFieldTypeInfo + " to " + tableFieldTypeInfo + "."; throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH - .getMsg(qb.getParseInfo().getDestForClause(dest), reason)); + .getMsg(qb.getParseInfo().getDestForClause(dest), reason)); } } expressions.add(column); @@ -5247,11 +5259,12 @@ return expressions; } - private ArrayList getSortCols(String dest, QB qb, Table tab, TableDesc table_desc, Operator input, boolean convert) - throws SemanticException { + private ArrayList getSortCols(String dest, QB qb, Table tab, TableDesc table_desc, + Operator input, boolean convert) + throws SemanticException { RowResolver inputRR = opParseCtx.get(input).getRowResolver(); List tabSortCols = tab.getSortCols(); - List tabCols = tab.getCols(); + List tabCols = tab.getCols(); // Partition by the bucketing column List posns = new ArrayList(); @@ -5271,10 +5284,10 @@ } private ArrayList getSortOrders(String dest, QB qb, Table tab, Operator input) - throws SemanticException { + throws SemanticException { RowResolver inputRR = opParseCtx.get(input).getRowResolver(); List tabSortCols = tab.getSortCols(); - List tabCols = tab.getCols(); + List tabCols = tab.getCols(); ArrayList orders = new ArrayList(); for (Order sortCol : tabSortCols) { @@ -5290,11 +5303,11 @@ @SuppressWarnings("nls") private Operator genReduceSinkPlanForSortingBucketing(Table tab, Operator input, - ArrayList sortCols, - List sortOrders, - ArrayList partitionCols, - int numReducers) - throws SemanticException { + ArrayList sortCols, + List sortOrders, + ArrayList partitionCols, + int numReducers) + throws SemanticException { RowResolver inputRR = opParseCtx.get(input).getRowResolver(); // For the generation of the values expression just get the inputs @@ -5316,12 +5329,12 @@ StringBuilder order = new StringBuilder(); for (int sortOrder : sortOrders) { - order.append(sortOrder == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC ? '+' :'-'); + order.append(sortOrder == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC ? '+' : '-'); } Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils .getReduceSinkDesc(sortCols, valueCols, outputColumns, false, -1, - partitionCols, order.toString(), numReducers), + partitionCols, order.toString(), numReducers), new RowSchema(inputRR.getColumnInfos()), input), inputRR); interim.setColumnExprMap(colExprMap); @@ -5339,12 +5352,12 @@ Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, - Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema( - out_rwsch.getColumnInfos()), interim), out_rwsch); + Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema( + out_rwsch.getColumnInfos()), interim), out_rwsch); if (LOG.isDebugEnabled()) { LOG.debug("Created ReduceSink Plan for table: " + tab.getTableName() + - " row schema: " + out_rwsch.toString()); + " row schema: " + out_rwsch.toString()); } return output; @@ -5387,7 +5400,7 @@ "strict") && limit == null) { throw new SemanticException(generateErrorMessage(sortExprs, - ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg())); + ErrorMsg.NO_LIMIT_WITH_ORDERBY.getMsg())); } } } @@ -5434,7 +5447,7 @@ } Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils .getReduceSinkDesc(sortCols, valueCols, outputColumns, false, -1, - partitionCols, order.toString(), numReducers), + partitionCols, order.toString(), numReducers), new RowSchema(inputRR.getColumnInfos()), input), inputRR); interim.setColumnExprMap(colExprMap); @@ -5452,12 +5465,12 @@ Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, - Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema( - out_rwsch.getColumnInfos()), interim), out_rwsch); + Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema( + out_rwsch.getColumnInfos()), interim), out_rwsch); if (LOG.isDebugEnabled()) { LOG.debug("Created ReduceSink Plan for clause: " + dest + " row schema: " - + out_rwsch.toString()); + + out_rwsch.toString()); } return output; } @@ -5476,7 +5489,7 @@ Map colExprMap = new HashMap(); HashMap> posToAliasMap = new HashMap>(); HashMap> filterMap = - new HashMap>(); + new HashMap>(); for (int pos = 0; pos < right.length; ++pos) { @@ -5617,9 +5630,9 @@ ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, - reduceValues, outputColumns, false, joinTree.getNextTag(), - reduceKeys.size(), numReds), new RowSchema(outputRS - .getColumnInfos()), child), outputRS); + reduceValues, outputColumns, false, joinTree.getNextTag(), + reduceKeys.size(), numReds), new RowSchema(outputRS + .getColumnInfos()), child), outputRS); rsOp.setColumnExprMap(colExprMap); return rsOp; } @@ -5713,7 +5726,7 @@ // create selection operator Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new SelectDesc(colList, columnNames, false), new RowSchema(inputRR - .getColumnInfos()), input), inputRR); + .getColumnInfos()), input), inputRR); output.setColumnExprMap(input.getColumnExprMap()); return output; @@ -5762,12 +5775,13 @@ // Generate group-by operator float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); - float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); + float memoryThreshold = HiveConf + .getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( - new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - false,groupByMemoryUsage,memoryThreshold, null, false, 0), - new RowSchema(groupByOutputRowResolver.getColumnInfos()), - inputOperatorInfo), groupByOutputRowResolver); + new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + false, groupByMemoryUsage, memoryThreshold, null, false, 0), + new RowSchema(groupByOutputRowResolver.getColumnInfos()), + inputOperatorInfo), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); return op; @@ -5799,7 +5813,7 @@ if (commonType == null) { throw new SemanticException( "Cannot do equality join on different types: " + a.getTypeName() - + " and " + b.getTypeName()); + + " and " + b.getTypeName()); } } // Add implicit type conversion if necessary @@ -5808,7 +5822,7 @@ keys.get(i).set( k, TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc( - commonType.getTypeName(), keys.get(i).get(k))); + commonType.getTypeName(), keys.get(i).get(k))); } } } @@ -5907,7 +5921,7 @@ String alias = child.getChildCount() == 1 ? tableName : unescapeIdentifier(child.getChild(child.getChildCount() - 1) - .getText().toLowerCase()); + .getText().toLowerCase()); if (i == 0) { leftAliases.add(alias); @@ -6008,10 +6022,10 @@ if ((left.getToken().getType() == HiveParser.TOK_TABREF) || (left.getToken().getType() == HiveParser.TOK_SUBQUERY)) { String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0)) - .toLowerCase(); + .toLowerCase(); String alias = left.getChildCount() == 1 ? tableName : unescapeIdentifier(left.getChild(left.getChildCount() - 1) - .getText().toLowerCase()); + .getText().toLowerCase()); joinTree.setLeftAlias(alias); String[] leftAliases = new String[1]; leftAliases[0] = alias; @@ -6036,10 +6050,10 @@ if ((right.getToken().getType() == HiveParser.TOK_TABREF) || (right.getToken().getType() == HiveParser.TOK_SUBQUERY)) { String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0)) - .toLowerCase(); + .toLowerCase(); String alias = right.getChildCount() == 1 ? tableName : unescapeIdentifier(right.getChild(right.getChildCount() - 1) - .getText().toLowerCase()); + .getText().toLowerCase()); String[] rightAliases = new String[1]; rightAliases[0] = alias; joinTree.setRightAliases(rightAliases); @@ -6072,7 +6086,7 @@ joinTree.setFilterMap(new int[2][]); ArrayList> filtersForPushing = - new ArrayList>(); + new ArrayList>(); filtersForPushing.add(new ArrayList()); filtersForPushing.add(new ArrayList()); joinTree.setFiltersForPushing(filtersForPushing); @@ -6174,7 +6188,7 @@ ArrayList nns = node.getNullSafes(); ArrayList tns = target.getNullSafes(); for (int i = 0; i < tns.size(); i++) { - tns.set(i, tns.get(i) & nns.get(i)); // any of condition contains non-NS, non-NS + tns.set(i, tns.get(i) & nns.get(i)); // any of condition contains non-NS, non-NS } ArrayList> filters = target.getFilters(); @@ -6193,7 +6207,7 @@ for (int[] mapping : nmap) { if (mapping != null) { - for (int i = 0; i < mapping.length; i+=2) { + for (int i = 0; i < mapping.length; i += 2) { if (pos > 0 || mapping[i] > 0) { mapping[i] += trgtRightAliases.length; } @@ -6379,7 +6393,7 @@ } Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new SelectDesc(colList, columnNames, true), new RowSchema(inputRR - .getColumnInfos()), input), inputRR); + .getColumnInfos()), input), inputRR); output.setColumnExprMap(columnExprMap); return output; } @@ -6431,7 +6445,7 @@ } List currASTList = new ArrayList(); - for (ASTNode value: list) { + for (ASTNode value : list) { // 0 is function name for (int i = 1; i < value.getChildCount(); i++) { ASTNode parameter = (ASTNode) value.getChild(i); @@ -6533,8 +6547,8 @@ ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, - reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1), - new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input), + reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1), + new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input), reduceSinkOutputRowResolver); rsOp.setColumnExprMap(colExprMap); @@ -6542,7 +6556,7 @@ } // Groups the clause names into lists so that any two clauses in the same list has the same - // group by and distinct keys and no clause appears in more than one list. Returns a list of the + // group by and distinct keys and no clause appears in more than one list. Returns a list of the // lists of clauses. private List> getCommonGroupByDestGroups(QB qb, Operator input) throws SemanticException { @@ -6557,7 +6571,7 @@ // If this is a trivial query block return if (ks.size() <= 1) { - List oneList = new ArrayList(1); + List oneList = new ArrayList(1); if (ks.size() == 1) { oneList.add(ks.first()); } @@ -6576,7 +6590,7 @@ // Add the group by expressions List grpByExprs = getGroupByForClause(qbp, dest); - for (ASTNode grpByExpr: grpByExprs) { + for (ASTNode grpByExpr : grpByExprs) { ExprNodeDesc.ExprNodeDescEqualityWrapper grpByExprWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(genExprNodeDesc(grpByExpr, inputRR)); if (!sprayKeys.contains(grpByExprWrapper)) { @@ -6636,7 +6650,7 @@ List distinctExprs = new ArrayList(); - for (ASTNode distinctAggExpr: distinctAggExprs) { + for (ASTNode distinctAggExpr : distinctAggExprs) { // 0 is function name for (int i = 1; i < distinctAggExpr.getChildCount(); i++) { ASTNode parameter = (ASTNode) distinctAggExpr.getChild(i); @@ -6734,7 +6748,7 @@ // Constructs a standard group by plan if: // There is no other subquery with the same group by/distinct keys or // (There are no aggregations in a representative query for the group and - // There is no group by in that representative query) or + // There is no group by in that representative query) or // The data is skewed or // The conf variable used to control combining group bys into a signle reducer is false if (commonGroupByDestGroup.size() == 1 || @@ -6753,9 +6767,9 @@ if (qbp.getAggregationExprsForClause(dest).size() != 0 || getGroupByForClause(qbp, dest).size() > 0) { - //multiple distincts is not supported with skew in data + // multiple distincts is not supported with skew in data if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) && - qbp.getDistinctFuncExprsForClause(dest).size() > 1) { + qbp.getDistinctFuncExprsForClause(dest).size() > 1) { throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS. getMsg()); } @@ -6899,7 +6913,7 @@ HashMap rightmap = rightRR.getFieldMap(rightalias); // make sure the schemas of both sides are the same ASTNode tabref = qb.getAliases().isEmpty() ? null : - qb.getParseInfo().getSrcForAlias(qb.getAliases().get(0)); + qb.getParseInfo().getSrcForAlias(qb.getAliases().get(0)); if (leftmap.size() != rightmap.size()) { throw new SemanticException("Schema of both sides of union should match."); } @@ -6910,31 +6924,31 @@ if (rInfo == null) { throw new SemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match. " + rightalias - + " does not have the field " + field)); + + " does not have the field " + field)); } if (lInfo == null) { throw new SemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match. " + leftalias - + " does not have the field " + field)); + + " does not have the field " + field)); } if (!lInfo.getInternalName().equals(rInfo.getInternalName())) { throw new SemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match: field " + field + ":" - + " appears on the left side of the UNION at column position: " + - getPositionFromInternalName(lInfo.getInternalName()) - + ", and on the right side of the UNION at column position: " + - getPositionFromInternalName(rInfo.getInternalName()) - + ". Column positions should match for a UNION")); + + " appears on the left side of the UNION at column position: " + + getPositionFromInternalName(lInfo.getInternalName()) + + ", and on the right side of the UNION at column position: " + + getPositionFromInternalName(rInfo.getInternalName()) + + ". Column positions should match for a UNION")); } - //try widening coversion, otherwise fail union + // try widening coversion, otherwise fail union TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType()); if (commonTypeInfo == null) { throw new SemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match: Column " + field - + " is of type " + lInfo.getType().getTypeName() - + " on first table and type " + rInfo.getType().getTypeName() - + " on second table")); + + " is of type " + lInfo.getType().getTypeName() + + " on first table and type " + rInfo.getType().getTypeName() + + " on second table")); } } @@ -6946,7 +6960,7 @@ ColumnInfo rInfo = rightmap.get(field); ColumnInfo unionColInfo = new ColumnInfo(lInfo); unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), - rInfo.getType())); + rInfo.getType())); unionoutRR.put(unionalias, field, unionColInfo); } @@ -6964,7 +6978,7 @@ if (leftOp instanceof UnionOperator) { // make left a child of right List> child = - new ArrayList>(); + new ArrayList>(); child.add(leftOp); rightOp.setChildOperators(child); @@ -6978,7 +6992,7 @@ } else { // make right a child of left List> child = - new ArrayList>(); + new ArrayList>(); child.add(rightOp); leftOp.setChildOperators(child); @@ -6995,11 +7009,11 @@ // Create a new union operator Operator unionforward = OperatorFactory .getAndMakeChild(new UnionDesc(), new RowSchema(unionoutRR - .getColumnInfos())); + .getColumnInfos())); // set union operator as child of each of leftOp and rightOp List> child = - new ArrayList>(); + new ArrayList>(); child.add(unionforward); rightOp.setChildOperators(child); @@ -7008,7 +7022,7 @@ leftOp.setChildOperators(child); List> parent = - new ArrayList>(); + new ArrayList>(); parent.add(leftOp); parent.add(rightOp); unionforward.setParentOperators(parent); @@ -7019,9 +7033,9 @@ /** * Generates a select operator which can go between the original input operator and the union - * operator. This select casts columns to match the type of the associated column in the union, - * other columns pass through unchanged. The new operator's only parent is the original input - * operator to the union, and it's only child is the union. If the input does not need to be + * operator. This select casts columns to match the type of the associated column in the union, + * other columns pass through unchanged. The new operator's only parent is the original input + * operator to the union, and it's only child is the union. If the input does not need to be * cast, the original operator is returned, and no new select operator is added. * * @param origInputOp @@ -7040,15 +7054,16 @@ private Operator genInputSelectForUnion( Operator origInputOp, Map origInputFieldMap, String origInputAlias, RowResolver unionoutRR, String unionalias) - throws UDFArgumentException { + throws UDFArgumentException { List columns = new ArrayList(); boolean needsCast = false; - for (Map.Entry unionEntry: unionoutRR.getFieldMap(unionalias).entrySet()) { + for (Map.Entry unionEntry : unionoutRR.getFieldMap(unionalias).entrySet()) { String field = unionEntry.getKey(); ColumnInfo lInfo = origInputFieldMap.get(field); ExprNodeDesc column = new ExprNodeColumnDesc(lInfo.getType(), lInfo.getInternalName(), - lInfo.getTabAlias(), lInfo.getIsVirtualCol(), lInfo.isSkewedCol());; + lInfo.getTabAlias(), lInfo.getIsVirtualCol(), lInfo.isSkewedCol()); + ; if (!lInfo.getType().equals(unionEntry.getValue().getType())) { needsCast = true; column = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc( @@ -7192,10 +7207,10 @@ * if the column is a skewed column, use ColumnInfo accordingly */ ColumnInfo colInfo = new ColumnInfo(fields.get(i).getFieldName(), - TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i) - .getFieldObjectInspector()), alias, false); + TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i) + .getFieldObjectInspector()), alias, false); colInfo.setSkewedCol((isSkewedCol(alias, qb, fields.get(i) - .getFieldName())) ? true : false); + .getFieldName())) ? true : false); rwsch.put(alias, fields.get(i).getFieldName(), colInfo); } } catch (SerDeException e) { @@ -7211,9 +7226,9 @@ TypeInfoFactory.stringTypeInfo, alias, true)); } - //put all virutal columns in RowResolver. + // put all virutal columns in RowResolver. Iterator vcs = VirtualColumn.getRegistry(conf).iterator(); - //use a list for easy cumtomize + // use a list for easy cumtomize List vcList = new ArrayList(); while (vcs.hasNext()) { VirtualColumn vc = vcs.next(); @@ -7233,7 +7248,7 @@ } top = putOpInsertMap(OperatorFactory.get(tsDesc, - new RowSchema(rwsch.getColumnInfos())), rwsch); + new RowSchema(rwsch.getColumnInfos())), rwsch); // Add this to the list of top operators - we always start from a table // scan @@ -7267,7 +7282,7 @@ if (num > den) { throw new SemanticException( ErrorMsg.BUCKETED_NUMERATOR_BIGGER_DENOMINATOR.getMsg() + " " - + tab.getTableName()); + + tab.getTableName()); } // check if a predicate is needed @@ -7310,7 +7325,7 @@ colsEqual, alias, rwsch, qb.getMetaData(), null); tableOp = OperatorFactory.getAndMakeChild(new FilterDesc( samplePredicate, true, new sampleDesc(ts.getNumerator(), ts - .getDenominator(), tabBucketCols, true)), + .getDenominator(), tabBucketCols, true)), new RowSchema(rwsch.getColumnInfos()), top); } else { // need to add filter @@ -7350,9 +7365,9 @@ .getBucketCols(), true, alias, rwsch, qb.getMetaData(), null); tableOp = OperatorFactory .getAndMakeChild(new FilterDesc(samplePred, true, - new sampleDesc(tsSample.getNumerator(), tsSample - .getDenominator(), tab.getBucketCols(), true)), - new RowSchema(rwsch.getColumnInfos()), top); + new sampleDesc(tsSample.getNumerator(), tsSample + .getDenominator(), tab.getBucketCols(), true)), + new RowSchema(rwsch.getColumnInfos()), top); LOG.info("No need for sample filter"); } else { // The table is not bucketed, add a dummy filter :: rand() @@ -7363,7 +7378,7 @@ LOG.info("Need sample filter"); ExprNodeDesc randFunc = TypeCheckProcFactory.DefaultExprProcessor .getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer - .valueOf(460476415))); + .valueOf(460476415))); ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, alias, rwsch, qb.getMetaData(), randFunc); tableOp = OperatorFactory.getAndMakeChild(new FilterDesc( @@ -7386,7 +7401,7 @@ private boolean isSkewedCol(String alias, QB qb, String colName) { boolean isSkewedCol = false; List skewedCols = qb.getSkewedColumnNames(alias); - for (String skewedCol:skewedCols) { + for (String skewedCol : skewedCols) { if (skewedCol.equalsIgnoreCase(colName)) { isSkewedCol = true; } @@ -7394,7 +7409,8 @@ return isSkewedCol; } - private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, RowResolver rwsch) + private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String alias, + RowResolver rwsch) throws SemanticException { if (!qbp.isAnalyzeCommand()) { @@ -7429,7 +7445,7 @@ // Theoretically the key prefix could be any unique string shared // between TableScanOperator (when publishing) and StatsTask (when aggregating). // Here we use - // table_name + partitionSec + // table_name + partitionSec // as the prefix for easy of read during explain and debugging. // Currently, partition spec can only be static partition. String k = tblName + Path.SEPARATOR; @@ -7560,7 +7576,7 @@ RowResolver lvForwardRR = new RowResolver(); RowResolver source = opParseCtx.get(op).getRowResolver(); for (ColumnInfo col : source.getColumnInfos()) { - if(col.getIsVirtualCol() && col.isHiddenVirtualCol()) { + if (col.getIsVirtualCol() && col.isHiddenVirtualCol()) { continue; } String[] tabCol = source.reverseLookup(col.getInternalName()); @@ -7577,10 +7593,10 @@ // Get the all path by making a select(*). RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver(); - //Operator allPath = op; + // Operator allPath = op; Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild( - new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()), - lvForward), allPathRR); + new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()), + lvForward), allPathRR); // Get the UDTF Path QB blankQb = new QB(null, null, false); Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree @@ -7608,7 +7624,7 @@ // LVmerge.. in the above order Map colExprMap = new HashMap(); - int i=0; + int i = 0; for (ColumnInfo c : allPathRR.getColumnInfos()) { String internalName = getColumnInternalName(i); i++; @@ -7661,7 +7677,7 @@ /** * A helper function to generate a column stats task on top of map-red task. The column stats * task fetches from the output of the map-red task, constructs the column stats object and - * persists it to the metastore. + * persists it to the metastore. * * This method generates a plan with a column stats task on top of map-red task and sets up the * appropriate metadata to be used during execution. @@ -7689,7 +7705,7 @@ resultTab, qb.getParseInfo().getOuterQueryLimit()); ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, partName, - colName, colType, isTblLevel); + colName, colType, isTblLevel); cStatsWork = new ColumnStatsWork(fetch, cStatsDesc); cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf); rootTasks.add(cStatsTask); @@ -7708,7 +7724,8 @@ initParseCtx(pCtx); List> mvTask = new ArrayList>(); - /* In case of a select, use a fetch task instead of a move task. + /* + * In case of a select, use a fetch task instead of a move task. * If the select is from analyze table column rewrite, don't create a fetch task. Instead create * a column stats task later. */ @@ -7746,12 +7763,14 @@ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) { IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf); try { - List> indexUpdateTasks = indexUpdater.generateUpdateTasks(); + List> indexUpdateTasks = indexUpdater + .generateUpdateTasks(); for (Task updateTask : indexUpdateTasks) { tsk.addDependentTask(updateTask); } } catch (HiveException e) { - console.printInfo("WARNING: could not auto-update stale indexes, indexes are not in of sync"); + console + .printInfo("WARNING: couldn't auto-update stale indexes, they are not in sync"); } } } @@ -7808,41 +7827,26 @@ // The dispatcher generates the plan from the operator tree Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp(new String("R1"), - TableScanOperator.getOperatorName() + "%"), - new GenMRTableScan1()); + TableScanOperator.getOperatorName() + "%"), + new GenMRTableScan1()); opRules.put(new RuleRegExp(new String("R2"), - TableScanOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), - new GenMRRedSink1()); + TableScanOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), + new GenMRRedSink1()); opRules.put(new RuleRegExp(new String("R3"), - ReduceSinkOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), - new GenMRRedSink2()); + ReduceSinkOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), + new GenMRRedSink2()); opRules.put(new RuleRegExp(new String("R4"), - FileSinkOperator.getOperatorName() + "%"), - new GenMRFileSink1()); + FileSinkOperator.getOperatorName() + "%"), + new GenMRFileSink1()); opRules.put(new RuleRegExp(new String("R5"), - UnionOperator.getOperatorName() + "%"), - new GenMRUnion1()); + UnionOperator.getOperatorName() + "%"), + new GenMRUnion1()); opRules.put(new RuleRegExp(new String("R6"), - UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), - new GenMRRedSink3()); - opRules.put(new RuleRegExp(new String("R6"), - MapJoinOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), - new GenMRRedSink4()); + UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), + new GenMRRedSink3()); opRules.put(new RuleRegExp(new String("R7"), - TableScanOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getTableScanMapJoin()); - opRules.put(new RuleRegExp(new String("R8"), - ReduceSinkOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getReduceSinkMapJoin()); - opRules.put(new RuleRegExp(new String("R9"), - UnionOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getUnionMapJoin()); - opRules.put(new RuleRegExp(new String("R10"), - MapJoinOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"), - MapJoinFactory.getMapJoinMapJoin()); - opRules.put(new RuleRegExp(new String("R11"), - MapJoinOperator.getOperatorName() + "%" + SelectOperator.getOperatorName() + "%"), - MapJoinFactory.getMapJoin()); + MapJoinOperator.getOperatorName() + "%"), + MapJoinFactory.getTableScanMapJoin()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along @@ -7854,7 +7858,8 @@ topNodes.addAll(topOps.values()); ogw.startWalking(topNodes, null); - /* If the query was the result of analyze table column compute statistics rewrite, create + /* + * If the query was the result of analyze table column compute statistics rewrite, create * a column stats task instead of a fetch task to persist stats to the metastore. */ if (isCStats) { @@ -7915,12 +7920,12 @@ getLeafTasks(rootTasks, leaves); assert (leaves.size() > 0); for (Task task : leaves) { - if (task instanceof StatsTask){ - //StatsTask require table to already exist - for (Task parentOfStatsTask : task.getParentTasks()){ + if (task instanceof StatsTask) { + // StatsTask require table to already exist + for (Task parentOfStatsTask : task.getParentTasks()) { parentOfStatsTask.addDependentTask(crtTblTask); } - for (Task parentOfCrtTblTask : crtTblTask.getParentTasks()){ + for (Task parentOfCrtTblTask : crtTblTask.getParentTasks()) { parentOfCrtTblTask.removeDependentTask(task); } crtTblTask.addDependentTask(task); @@ -7932,8 +7937,8 @@ if (globalLimitCtx.isEnable() && fetchTask != null) { int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH); - LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit()); - fetchTask.getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit()); + LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit()); + fetchTask.getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit()); } if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) { @@ -8175,7 +8180,7 @@ } viewSelect = child; // prevent view from referencing itself - viewsExpanded.add(db.getCurrentDatabase()+"."+createVwDesc.getViewName()); + viewsExpanded.add(db.getCurrentDatabase() + "." + createVwDesc.getViewName()); } // continue analyzing from the child ASTNode. @@ -8190,7 +8195,7 @@ LOG.info("Completed getting MetaData in Semantic Analysis"); // Save the result schema derived from the sink operator produced - // by genPlan. This has the correct column names, which clients + // by genPlan. This has the correct column names, which clients // such as JDBC would prefer instead of the c0, c1 we'll end // up with later. Operator sinkOp = genPlan(qb); @@ -8255,8 +8260,8 @@ int derivedColCount = derivedSchema.size(); if (explicitColCount != derivedColCount) { throw new SemanticException(generateErrorMessage( - viewSelect, - ErrorMsg.VIEW_COL_MISMATCH.getMsg())); + viewSelect, + ErrorMsg.VIEW_COL_MISMATCH.getMsg())); } } @@ -8304,19 +8309,19 @@ if (createVwDesc.getPartColNames() != null) { // Make sure all partitioning columns referenced actually // exist and are in the correct order at the end - // of the list of columns produced by the view. Also move the field + // of the list of columns produced by the view. Also move the field // schema descriptors from derivedSchema to the partitioning key // descriptor. List partColNames = createVwDesc.getPartColNames(); if (partColNames.size() > derivedSchema.size()) { - throw new SemanticException( + throw new SemanticException( ErrorMsg.VIEW_PARTITION_MISMATCH.getMsg()); } // Get the partition columns from the end of derivedSchema. List partitionColumns = derivedSchema.subList( - derivedSchema.size() - partColNames.size(), - derivedSchema.size()); + derivedSchema.size() - partColNames.size(), + derivedSchema.size()); // Verify that the names match the PARTITIONED ON clause. Iterator colNameIter = partColNames.iterator(); @@ -8326,20 +8331,20 @@ FieldSchema fieldSchema = schemaIter.next(); if (!fieldSchema.getName().equals(colName)) { throw new SemanticException( - ErrorMsg.VIEW_PARTITION_MISMATCH.getMsg()); + ErrorMsg.VIEW_PARTITION_MISMATCH.getMsg()); } } - // Boundary case: require at least one non-partitioned column + // Boundary case: require at least one non-partitioned column // for consistency with tables. if (partColNames.size() == derivedSchema.size()) { - throw new SemanticException( + throw new SemanticException( ErrorMsg.VIEW_PARTITION_TOTAL.getMsg()); } // Now make a copy. createVwDesc.setPartCols( - new ArrayList(partitionColumns)); + new ArrayList(partitionColumns)); // Finally, remove the partition columns from the end of derivedSchema. // (Clearing the subList writes through to the underlying @@ -8400,7 +8405,7 @@ */ @SuppressWarnings("nls") public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, - TypeCheckCtx tcCtx) throws SemanticException { + TypeCheckCtx tcCtx) throws SemanticException { // We recursively create the exprNodeDesc. Base cases: when we encounter // a column ref, we convert that into an exprNodeColumnDesc; when we // encounter @@ -8420,15 +8425,15 @@ .getIsVirtualCol(), colInfo.isSkewedCol()); } - // Create the walker and the rules dispatcher. + // Create the walker and the rules dispatcher. tcCtx.setUnparseTranslator(unparseTranslator); HashMap nodeOutputs = - TypeCheckProcFactory.genExprNode(expr, tcCtx); + TypeCheckProcFactory.genExprNode(expr, tcCtx); ExprNodeDesc desc = (ExprNodeDesc) nodeOutputs.get(expr); if (desc == null) { String errMsg = tcCtx.getError(); - if ( errMsg == null) { + if (errMsg == null) { errMsg = "Error in parsing "; } throw new SemanticException(errMsg); @@ -8469,7 +8474,7 @@ public void validate() throws SemanticException { LOG.debug("validation start"); // Validate inputs and outputs have right protectmode to execute the query - for (ReadEntity readEntity: getInputs()) { + for (ReadEntity readEntity : getInputs()) { ReadEntity.Type type = readEntity.getType(); if (type != ReadEntity.Type.TABLE && @@ -8486,22 +8491,22 @@ if (tbl.isOffline()) { throw new SemanticException( ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg( - "Table " + tbl.getTableName())); + "Table " + tbl.getTableName())); } if (type == ReadEntity.Type.PARTITION && p != null && p.isOffline()) { throw new SemanticException( ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg( - "Table " + tbl.getTableName() + - " Partition " + p.getName())); + "Table " + tbl.getTableName() + + " Partition " + p.getName())); } } - for (WriteEntity writeEntity: getOutputs()) { + for (WriteEntity writeEntity : getOutputs()) { WriteEntity.Type type = writeEntity.getType(); - if(type == WriteEntity.Type.PARTITION || type == WriteEntity.Type.DUMMYPARTITION) { + if (type == WriteEntity.Type.PARTITION || type == WriteEntity.Type.DUMMYPARTITION) { String conflictingArchive; try { Partition usedp = writeEntity.getPartition(); @@ -8514,7 +8519,7 @@ } catch (HiveException e) { throw new SemanticException(e); } - if(conflictingArchive != null) { + if (conflictingArchive != null) { String message = String.format("Insert conflict with existing archive: %s", conflictingArchive); throw new SemanticException(message); @@ -8549,11 +8554,11 @@ throw new SemanticException(e); } - if (type == WriteEntity.Type.PARTITION && p!=null && p.isOffline()) { + if (type == WriteEntity.Type.PARTITION && p != null && p.isOffline()) { throw new SemanticException( ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg( - " Table " + tbl.getTableName() + - " Partition " + p.getName())); + " Table " + tbl.getTableName() + + " Partition " + p.getName())); } } @@ -8565,11 +8570,12 @@ if (tbl.isOffline()) { throw new SemanticException( ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg( - "Table " + tbl.getTableName())); + "Table " + tbl.getTableName())); } } - boolean reworkMapredWork = HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_REWORK_MAPREDWORK); + boolean reworkMapredWork = HiveConf.getBoolVar(this.conf, + HiveConf.ConfVars.HIVE_REWORK_MAPREDWORK); // validate all tasks for (Task rootTask : rootTasks) { @@ -8599,7 +8605,9 @@ /** * Add default properties for table property. If a default parameter exists * in the tblProp, the value in tblProp will be kept. - * @param table property map + * + * @param table + * property map * @return Modified table property map */ private Map addDefaultProperties(Map tblProp) { @@ -8611,7 +8619,7 @@ } String paraString = HiveConf.getVar(conf, ConfVars.NEWTABLEDEFAULTPARA); if (paraString != null && !paraString.isEmpty()) { - for (String keyValuePair: paraString.split(",")) { + for (String keyValuePair : paraString.split(",")) { String[] keyValue = keyValuePair.split("=", 2); if (keyValue.length != 2) { continue; @@ -8634,7 +8642,7 @@ */ private ASTNode analyzeCreateTable(ASTNode ast, QB qb) throws SemanticException { - String tableName = getUnescapedName((ASTNode)ast.getChild(0)); + String tableName = getUnescapedName((ASTNode) ast.getChild(0)); String likeTableName = null; List cols = new ArrayList(); List partCols = new ArrayList(); @@ -8684,7 +8692,7 @@ break; case HiveParser.TOK_LIKETABLE: if (child.getChildCount() > 0) { - likeTableName = getUnescapedName((ASTNode)child.getChild(0)); + likeTableName = getUnescapedName((ASTNode) child.getChild(0)); if (likeTableName != null) { if (command_type == CTAS) { throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE @@ -8808,8 +8816,10 @@ tblProps = addDefaultProperties(tblProps); crtTblDesc = new CreateTableDesc(tableName, isExt, cols, partCols, - bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, - rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, + bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, + rowFormatParams.fieldEscape, + rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, + comment, storageFormat.inputFormat, storageFormat.outputFormat, location, shared.serde, storageFormat.storageHandler, shared.serdeProps, tblProps, ifNotExists, skewedColNames, skewedValues); @@ -8839,7 +8849,7 @@ try { Table dumpTable = db.newTable(tableName); databaseName = dumpTable.getDbName(); - if (null == db.getDatabase(dumpTable.getDbName()) ) { + if (null == db.getDatabase(dumpTable.getDbName())) { throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(dumpTable.getDbName())); } if (null != db.getTable(dumpTable.getDbName(), dumpTable.getTableName(), false)) { @@ -8852,9 +8862,12 @@ tblProps = addDefaultProperties(tblProps); crtTblDesc = new CreateTableDesc(databaseName, tableName, isExt, cols, partCols, - bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, - rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.inputFormat, - storageFormat.outputFormat, location, shared.serde, storageFormat.storageHandler, shared.serdeProps, + bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, + rowFormatParams.fieldEscape, + rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, + comment, storageFormat.inputFormat, + storageFormat.outputFormat, location, shared.serde, storageFormat.storageHandler, + shared.serdeProps, tblProps, ifNotExists, skewedColNames, skewedValues); crtTblDesc.setStoredAsSubDirectories(storedAsDirs); qb.setTableDesc(crtTblDesc); @@ -8870,7 +8883,7 @@ private ASTNode analyzeCreateView(ASTNode ast, QB qb) throws SemanticException { - String tableName = getUnescapedName((ASTNode)ast.getChild(0)); + String tableName = getUnescapedName((ASTNode) ast.getChild(0)); List cols = null; boolean ifNotExists = false; boolean orReplace = false; @@ -8911,12 +8924,12 @@ } } - if (ifNotExists && orReplace){ + if (ifNotExists && orReplace) { throw new SemanticException("Can't combine IF NOT EXISTS and OR REPLACE."); } createVwDesc = new CreateViewDesc( - tableName, cols, comment, tblProps, partColNames, ifNotExists, orReplace); + tableName, cols, comment, tblProps, partColNames, ifNotExists, orReplace); unparseTranslator.enable(); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), createVwDesc), conf)); @@ -8926,7 +8939,7 @@ private void decideExecMode(List> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) - throws SemanticException { + throws SemanticException { // bypass for explain queries for now if (ctx.getExplain()) { @@ -8940,20 +8953,20 @@ } final Context lCtx = ctx; - PathFilter p = new PathFilter () { - public boolean accept(Path file) { - return !lCtx.isMRTmpFileURI(file.toUri().getPath()); - } - }; + PathFilter p = new PathFilter() { + public boolean accept(Path file) { + return !lCtx.isMRTmpFileURI(file.toUri().getPath()); + } + }; List mrtasks = Utilities.getMRTasks(rootTasks); // map-reduce jobs will be run locally based on data size // first find out if any of the jobs needs to run non-locally boolean hasNonLocalJob = false; - for (ExecDriver mrtask: mrtasks) { + for (ExecDriver mrtask : mrtasks) { try { ContentSummary inputSummary = Utilities.getInputSummary - (ctx, (MapredWork)mrtask.getWork(), p); + (ctx, (MapredWork) mrtask.getWork(), p); int numReducers = getNumberOfReducers(mrtask.getWork(), conf); long estimatedInput; @@ -8976,8 +8989,8 @@ if (LOG.isDebugEnabled()) { LOG.debug("Task: " + mrtask.getId() + ", Summary: " + - inputSummary.getLength() + "," + inputSummary.getFileCount() + "," - + numReducers + ", estimated Input: " + estimatedInput); + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + + numReducers + ", estimated Input: " + estimatedInput); } if (MapRedTask.isEligibleForLocalMode(conf, numReducers, @@ -8988,15 +9001,15 @@ mrtask.setLocalMode(true); } } catch (IOException e) { - throw new SemanticException (e); + throw new SemanticException(e); } } - if(!hasNonLocalJob) { + if (!hasNonLocalJob) { // Entire query can be run locally. // Save the current tracker value and restore it when done. ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(conf)); - ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf,"local"); + ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local"); console.printInfo("Automatically selecting local only mode for query"); // If all the tasks can be run locally, we can use local disk for Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1421079) +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy) @@ -277,6 +277,12 @@ "Grouping sets aggregations (with rollups or cubes) are not allowed if aggregation function " + "parameters overlap with the aggregation functions columns"), + OPERATOR_NOT_ALLOWED_BEFORE_MAPJOIN(10211, + "All operators are not allowed before mapjoin hint. Remove the mapjoin hint."), + + OPERATOR_NOT_ALLOWED_AFTER_MAPJOIN(10212, + "All operators are not allowed after mapjoin hint. Remove the mapjoin hint."), + HIVE_GROUPING_SETS_AGGR_NOFUNC(10211, "Grouping sets aggregations are not allowed if no aggregation function is presented"),