Index: ql/src/test/results/clientpositive/smb_mapjoin_19.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_19.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_19.q.out (working copy) @@ -0,0 +1,406 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=1/ +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.test_table2 + partition_columns ds + serialization.ddl struct test_table2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count 16 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows 500 + partition_columns ds + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numPartitions 1 + numRows 500 + partition_columns ds + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table1 + name: default.test_table1 + Truncated Path -> Alias: + /test_table1/ds=1 [a] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.test_table2 + partition_columns ds + serialization.ddl struct test_table2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 +#### A masked pattern was here #### + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table1 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +36 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +40 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +29 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +36 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +40 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +29 +PREHOOK: query: select count(*) from test_table2 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +36 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +40 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +29 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +36 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +40 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +29 Index: ql/src/test/results/clientpositive/smb_mapjoin_18.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_18.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_18.q.out (working copy) @@ -0,0 +1,346 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=1/ +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.test_table2 + partition_columns ds + serialization.ddl struct test_table2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 2 + numRows 500 + partition_columns ds + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 2 + numPartitions 1 + numRows 500 + partition_columns ds + rawDataSize 5312 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table1 + name: default.test_table1 + Truncated Path -> Alias: + /test_table1/ds=1 [a] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.test_table2 + partition_columns ds + serialization.ddl struct test_table2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 +#### A masked pattern was here #### + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table1 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +247 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +247 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: select count(*) from test_table2 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +247 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +247 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +253 Index: ql/src/test/queries/clientpositive/smb_mapjoin_18.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_18.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_18.q (working copy) @@ -0,0 +1,38 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +select count(*) from test_table1 where ds = '1'; +select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0; +select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1; +select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1'; +select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1'; + +select count(*) from test_table2 where ds = '1'; +select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0; +select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1; +select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'; +select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'; Index: ql/src/test/queries/clientpositive/smb_mapjoin_19.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_19.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_19.q (working copy) @@ -0,0 +1,42 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +select count(*) from test_table1 where ds = '1'; +select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0; +select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5; +select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12; +select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1'; +select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1'; +select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1'; + +select count(*) from test_table2 where ds = '1'; +select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0; +select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5; +select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12; +select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1'; +select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1'; +select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1'; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (working copy) @@ -0,0 +1,305 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.ExtractOperator; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; + +/** + * This transformation does optimization for enforcing bucketing and sorting. + * For a query of the form: + * insert overwrite table T1 select * from T2; + * where T1 and T2 are bucketized/sorted on the same keys, we don't need a reducer to + * enforce bucketing and sorting. + */ +public class BucketingSortingReduceSinkOptimizer implements Transform { + + private static final Log LOG = LogFactory.getLog(BucketingSortingReduceSinkOptimizer.class + .getName()); + + public BucketingSortingReduceSinkOptimizer() { + } + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + + Map opRules = new LinkedHashMap(); + HiveConf conf = pctx.getConf(); + + // process group-by pattern + opRules.put(new RuleRegExp("R1", + ReduceSinkOperator.getOperatorName() + "%" + + ExtractOperator.getOperatorName() + "%" + + FileSinkOperator.getOperatorName() + "%"), + getBucketSortReduceSinkProc(pctx)); + + // The dispatcher fires the processor corresponding to the closest matching rule + Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, null); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + return pctx; + } + + private NodeProcessor getDefaultProc() { + return new NodeProcessor() { + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + return null; + } + }; + } + + private NodeProcessor getBucketSortReduceSinkProc(ParseContext pctx) { + return new BucketSortReduceSinkProcessor(pctx); + } + + /** + * BucketSortReduceSinkProcessor. + * + */ + public class BucketSortReduceSinkProcessor implements NodeProcessor { + + protected ParseContext pGraphContext; + + public BucketSortReduceSinkProcessor(ParseContext pGraphContext) { + this.pGraphContext = pGraphContext; + } + + private List getBucketPositions(List tabBucketCols, + List tabCols) { + List posns = new ArrayList(); + for (String bucketCol : tabBucketCols) { + int pos = 0; + for (FieldSchema tabCol : tabCols) { + if (bucketCol.equals(tabCol.getName())) { + posns.add(pos); + break; + } + pos++; + } + } + return posns; + } + + private List getBucketPositions(FileSinkOperator fsOp) { + Table destTable = pGraphContext.getFsopToTable().get(fsOp); + if (destTable == null) { + return null; + } + + return getBucketPositions(destTable.getBucketCols(), + destTable.getAllCols()); + } + + private boolean checkPartition(Partition partition, + List bucketPositions) { + // The bucketing and sorting positions should exactly match + int numBuckets = partition.getBucketCount(); + if (bucketPositions.size() != numBuckets) { + return false; + } + + List partnBucketPositions = getBucketPositions(partition.getBucketCols(), + partition.getTable().getAllCols()); + + return bucketPositions.equals(partnBucketPositions); + } + + private boolean checkTable(Table table, + List bucketPositions) { + // The bucketing and sorting positions should exactly match + int numBuckets = table.getNumBuckets(); + if (bucketPositions.size() != numBuckets) { + return false; + } + + List tableBucketPositions = getBucketPositions(table.getBucketCols(), + table.getAllCols()); + + return bucketPositions.equals(tableBucketPositions); + } + + private void removeReduceSink(ReduceSinkOperator rsOp, + TableScanOperator tsOp, + FileSinkOperator fsOp, + Table table) { + removeReduceSink(rsOp, tsOp, fsOp); + FileStatus[] srcs = table.getBucketPaths(); + Map bucketFileNameMapping = new HashMap(); + for (int pos = 0; pos < srcs.length; pos++) { + bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos); + } + tsOp.getConf().setBucketFileNameMapping(bucketFileNameMapping); + } + + private void removeReduceSink(ReduceSinkOperator rsOp, + TableScanOperator tsOp, + FileSinkOperator fsOp, + Partition partition) { + removeReduceSink(rsOp, tsOp, fsOp); + FileStatus[] srcs = partition.getBucketPaths(); + Map bucketFileNameMapping = new HashMap(); + for (int pos = 0; pos < srcs.length; pos++) { + bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos); + } + tsOp.getConf().setBucketFileNameMapping(bucketFileNameMapping); + } + + private void removeReduceSink(ReduceSinkOperator rsOp, + TableScanOperator tsOp, + FileSinkOperator fsOp) { + Operator parRSOp = rsOp.getParentOperators().get(0); + parRSOp.getChildOperators().set(0, fsOp); + fsOp.getParentOperators().set(0, parRSOp); + fsOp.getConf().setMultiFileSpray(false); + fsOp.getConf().setTotalFiles(1); + fsOp.getConf().setNumFiles(1); + tsOp.getConf().setNoteCurrentFileName(true); + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + // If the reduce sink has not been introduced due to bucketing/sorting, ignore it + FileSinkOperator fsOp = (FileSinkOperator) nd; + ExtractOperator exOp = (ExtractOperator) fsOp.getParentOperators().get(0); + ReduceSinkOperator rsOp = (ReduceSinkOperator) exOp.getParentOperators().get(0); + + List rsOps = pGraphContext + .getReduceSinkOperatorsAddedByEnforceBucketingSorting(); + // nothing to do + if ((rsOps != null) && (!rsOps.contains(rsOp))) { + return null; + } + + // Support for dynamic partitions can be added later + if (fsOp.getConf().getDynPartCtx() != null) { + return null; + } + + // Get the positions for sorted and bucketed columns + List bucketPositions = getBucketPositions(fsOp); + + // Only selects and filters are allowed + Operator op = rsOp; + while (true) { + if (op.getParentOperators().size() > 1) { + return null; + } + + op = op.getParentOperators().get(0); + if (!(op instanceof TableScanOperator) && + !(op instanceof FilterOperator) && + !(op instanceof SelectOperator)) { + return null; + } + + // nothing to be done for filters - the output schema does not change. + if (op instanceof TableScanOperator) { + Table destTable = pGraphContext.getTopToTable().get(op); + + if (destTable.isPartitioned()) { + PrunedPartitionList prunedParts = pGraphContext.getOpToPartList().get(op); + List partitions = prunedParts.getNotDeniedPartns(); + + // Support for dynamic partitions can be added later + if ((partitions == null) || (partitions.isEmpty()) || (partitions.size() > 1)) { + return null; + } + for (Partition partition : partitions) { + if (checkPartition(partition, bucketPositions)) { + return null; + } + } + + removeReduceSink(rsOp, (TableScanOperator) op, fsOp, partitions.get(0)); + return null; + } + else { + if (checkTable(destTable, bucketPositions)) { + return null; + } + + removeReduceSink(rsOp, (TableScanOperator) op, fsOp, destTable); + return null; + } + } + // None of the operators is changing the positions + else if (op instanceof SelectOperator) { + SelectOperator selectOp = (SelectOperator) op; + SelectDesc selectDesc = selectOp.getConf(); + + if (selectDesc.isSelStarNoCompute()) { + continue; + } + + // Only columns can be selected + for (int pos : bucketPositions) { + ExprNodeDesc selectColList = selectDesc.getColList().get(pos); + if (!(selectColList instanceof ExprNodeColumnDesc)) { + return null; + } + } + } + } + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 1461939) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -58,6 +58,7 @@ /* Add list bucketing pruner. */ transformations.add(new ListBucketingPruner()); } + transformations.add(new BucketingSortingReduceSinkOptimizer()); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCP)) { transformations.add(new ColumnPruner()); Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 1461939) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy) @@ -388,7 +388,7 @@ */ // TODO: add test case and clean it up @SuppressWarnings("nls") - public Path getBucketPath(int bucketNum) { + public FileStatus[] getBucketPaths() { try { // Previously, this got the filesystem of the Table, which could be // different from the filesystem of the partition. @@ -407,14 +407,26 @@ if (srcs.length == 0) { return null; } - return srcs[bucketNum].getPath(); + return srcs; } catch (Exception e) { - throw new RuntimeException("Cannot get bucket path for bucket " - + bucketNum, e); + throw new RuntimeException("Cannot get path ", e); } } + /** + * mapping from bucket number to bucket path + */ + // TODO: add test case and clean it up @SuppressWarnings("nls") + public Path getBucketPath(int bucketNum) { + FileStatus srcs[] = getBucketPaths(); + if (srcs.length == 0) { + return null; + } + return srcs[bucketNum].getPath(); + } + + @SuppressWarnings("nls") public Path[] getPath(Sample s) throws HiveException { if (s == null) { return getPath(); Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (revision 1461939) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (working copy) @@ -22,6 +22,7 @@ import java.io.Serializable; import java.net.URI; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; @@ -31,6 +32,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.JavaUtils; @@ -925,4 +927,30 @@ Hive hive = Hive.get(); return hive.getIndexes(getTTable().getDbName(), getTTable().getTableName(), max); } + + @SuppressWarnings("nls") + public FileStatus[] getBucketPaths() { + try { + // Previously, this got the filesystem of the Table, which could be + // different from the filesystem of the partition. + FileSystem fs = FileSystem.get(getPath().toUri(), Hive.get() + .getConf()); + String pathPattern = getPath().toString(); + if (getNumBuckets() > 0) { + pathPattern = pathPattern + "/*"; + } + LOG.info("Path pattern = " + pathPattern); + FileStatus srcs[] = fs.globStatus(new Path(pathPattern)); + Arrays.sort(srcs); + for (FileStatus src : srcs) { + LOG.info("Got file: " + src.getPath()); + } + if (srcs.length == 0) { + return null; + } + return srcs; + } catch (Exception e) { + throw new RuntimeException("Cannot get path ", e); + } + } }; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (revision 1461939) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (working copy) @@ -94,6 +94,11 @@ @Override public void cleanUpInputFileChangedOp() throws HiveException { inputFileChanged = true; + if (conf.isNoteCurrentFileName()) { + String currentInputFile = getExecContext().getCurrentInputFile(); + getExecContext().setFileId(Integer.toString(conf.getBucketFileNameMapping().get( + Utilities.getFileNameFromDirName(currentInputFile)))); + } } private void gatherStats(Object row) { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java (revision 1461939) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java (working copy) @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; @@ -67,6 +68,10 @@ public static final String FILTER_TEXT_CONF_STR = "hive.io.filter.text"; + private boolean noteCurrentFileName = false; + // input file name (big) to bucket number + private Map bucketFileNameMapping; + @SuppressWarnings("nls") public TableScanDesc() { } @@ -170,4 +175,20 @@ public Integer getRowLimitExplain() { return rowLimit >= 0 ? rowLimit : null; } + + public boolean isNoteCurrentFileName() { + return noteCurrentFileName; + } + + public void setNoteCurrentFileName(boolean noteCurrentFileName) { + this.noteCurrentFileName = noteCurrentFileName; + } + + public Map getBucketFileNameMapping() { + return bucketFileNameMapping; + } + + public void setBucketFileNameMapping(Map bucketFileNameMapping) { + this.bucketFileNameMapping = bucketFileNameMapping; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1461939) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -198,6 +198,8 @@ private Map joinContext; private Map smbMapJoinContext; private final HashMap topToTable; + private final Map fsopToTable; + private final List reduceSinkOperatorsAddedByEnforceBucketingSorting; private QB qb; private ASTNode ast; private int destTableId; @@ -259,6 +261,8 @@ joinContext = new HashMap(); smbMapJoinContext = new HashMap(); topToTable = new HashMap(); + fsopToTable = new HashMap(); + reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList(); destTableId = 1; uCtx = null; listMapJoinOpsNoReducer = new ArrayList>(); @@ -317,11 +321,13 @@ public ParseContext getParseContext() { return new ParseContext(conf, qb, ast, opToPartPruner, opToPartList, topOps, - topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable, loadTableWork, + topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable, + fsopToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, - opToPartToSkewedPruner, viewAliasToInput); + opToPartToSkewedPruner, viewAliasToInput, + reduceSinkOperatorsAddedByEnforceBucketingSorting); } @SuppressWarnings("nls") @@ -5180,6 +5186,7 @@ + dest_path + " row schema: " + inputRR.toString()); } + fsopToTable.put((FileSinkOperator) output, dest_tab); return output; } @@ -5587,6 +5594,7 @@ partitionCols, order.toString(), numReducers), new RowSchema(inputRR.getColumnInfos()), input), inputRR); interim.setColumnExprMap(colExprMap); + reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator) interim); // Add the extract operator to get the value fields RowResolver out_rwsch = new RowResolver(); @@ -5609,6 +5617,7 @@ LOG.debug("Created ReduceSink Plan for table: " + tab.getTableName() + " row schema: " + out_rwsch.toString()); } + return output; } @@ -8515,11 +8524,12 @@ ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, opToPartList, topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext, - topToTable, + topToTable, fsopToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, - opToPartToSkewedPruner, viewAliasToInput); + opToPartToSkewedPruner, viewAliasToInput, + reduceSinkOperatorsAddedByEnforceBucketingSorting); // Generate table access stats if required if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == true) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 1461939) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -30,10 +30,12 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -74,6 +76,8 @@ private Map mapJoinContext; private Map smbMapJoinContext; private HashMap topToTable; + private Map fsopToTable; + private List reduceSinkOperatorsAddedByEnforceBucketingSorting; private HashMap nameToSplitSample; private List loadTableWork; private List loadFileWork; @@ -164,6 +168,7 @@ Map joinContext, Map smbMapJoinContext, HashMap topToTable, + Map fsopToTable, List loadTableWork, List loadFileWork, Context ctx, HashMap idToTableNameMap, int destTableId, UnionProcContext uCtx, List> listMapJoinOpsNoReducer, @@ -174,7 +179,8 @@ HashMap nameToSplitSample, HashSet semanticInputs, List> rootTasks, Map> opToPartToSkewedPruner, - Map viewAliasToInput) { + Map viewAliasToInput, + List reduceSinkOperatorsAddedByEnforceBucketingSorting) { this.conf = conf; this.qb = qb; this.ast = ast; @@ -183,6 +189,7 @@ this.joinContext = joinContext; this.smbMapJoinContext = smbMapJoinContext; this.topToTable = topToTable; + this.fsopToTable = fsopToTable; this.loadFileWork = loadFileWork; this.loadTableWork = loadTableWork; this.opParseCtx = opParseCtx; @@ -203,6 +210,8 @@ this.rootTasks = rootTasks; this.opToPartToSkewedPruner = opToPartToSkewedPruner; this.viewAliasToInput = viewAliasToInput; + this.reduceSinkOperatorsAddedByEnforceBucketingSorting = + reduceSinkOperatorsAddedByEnforceBucketingSorting; } /** @@ -304,6 +313,24 @@ this.topToTable = topToTable; } + public Map getFsopToTable() { + return fsopToTable; + } + + public void setFsopToTable(Map fsopToTable) { + this.fsopToTable = fsopToTable; + } + + public List getReduceSinkOperatorsAddedByEnforceBucketingSorting() { + return reduceSinkOperatorsAddedByEnforceBucketingSorting; + } + + public void setReduceSinkOperatorsAddedByEnforceBucketingSorting( + List reduceSinkOperatorsAddedByEnforceBucketingSorting) { + this.reduceSinkOperatorsAddedByEnforceBucketingSorting = + reduceSinkOperatorsAddedByEnforceBucketingSorting; + } + /** * @return the topOps */ Index: ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (revision 1461939) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (working copy) @@ -31,7 +31,7 @@ public class PrunedPartitionList { // source table - private Table source; + private final Table source; // confirmed partitions - satisfy the partition criteria private Set confirmedPartns; @@ -44,7 +44,7 @@ /** * @param confirmedPartns - * confirmed paritions + * confirmed partitions * @param unknownPartns * unknown partitions */ @@ -62,7 +62,7 @@ /** * get confirmed partitions. - * + * * @return confirmedPartns confirmed paritions */ public Set getConfirmedPartns() { @@ -71,7 +71,7 @@ /** * get unknown partitions. - * + * * @return unknownPartns unknown paritions */ public Set getUnknownPartns() { @@ -80,7 +80,7 @@ /** * get denied partitions. - * + * * @return deniedPartns denied paritions */ public Set getDeniedPartns() { @@ -99,7 +99,7 @@ /** * set confirmed partitions. - * + * * @param confirmedPartns * confirmed paritions */ @@ -109,7 +109,7 @@ /** * set unknown partitions. - * + * * @param unknownPartns * unknown partitions */