Index: conf/hive-default.xml.template =================================================================== --- conf/hive-default.xml.template (revision 1462406) +++ conf/hive-default.xml.template (working copy) @@ -933,6 +933,16 @@ + hive.optimize.bucketingsorting + true + If hive.enforce.bucketing or hive.enforce.sorting is true, dont create a reducer for enforcing + bucketing/sorting for queries of the form: + insert overwrite table T2 select * from T1; + where T1 and T2 are bucketed/sorted by the same keys into the same number of buckets. + + + + hive.enforce.sortmergebucketmapjoin false If the user asked for sort-merge bucketed map-side join, and it cannot be performed, Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1462406) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -513,6 +513,7 @@ HIVEENFORCEBUCKETING("hive.enforce.bucketing", false), HIVEENFORCESORTING("hive.enforce.sorting", false), + HIVEOPTIMIZEBUCKETINGSORTING("hive.optimize.bucketingsorting", true), HIVEPARTITIONER("hive.mapred.partitioner", "org.apache.hadoop.hive.ql.io.DefaultHivePartitioner"), HIVEENFORCESORTMERGEBUCKETMAPJOIN("hive.enforce.sortmergebucketmapjoin", false), HIVEENFORCEBUCKETMAPJOIN("hive.enforce.bucketmapjoin", false), Index: common/src/java/org/apache/hadoop/hive/common/ObjectPair.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/ObjectPair.java (revision 1462406) +++ common/src/java/org/apache/hadoop/hive/common/ObjectPair.java (working copy) @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.common; + + public class ObjectPair { private F first; private S second; @@ -44,4 +46,24 @@ public void setSecond(S second) { this.second = second; } + + @Override + public boolean equals(Object that) { + if (that == null) { + return false; + } + if (that instanceof ObjectPair) { + return this.equals((ObjectPair)that); + } + return false; + } + + public boolean equals(ObjectPair that) { + if (that == null) { + return false; + } + + return this.getFirst().equals(that.getFirst()) && + this.getSecond().equals(that.getSecond()); + } } Index: ql/src/test/results/clientpositive/smb_mapjoin_19.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_19.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_19.q.out (working copy) @@ -0,0 +1,315 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table1 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +36 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +40 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +29 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +36 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +40 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +29 +PREHOOK: query: select count(*) from test_table2 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +36 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +40 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +29 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +36 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +40 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +29 Index: ql/src/test/results/clientpositive/smb_mapjoin_20.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_20.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_20.q.out (working copy) @@ -0,0 +1,454 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key STRING, value1 STRING, value2 string) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key STRING, value1 STRING, value2 string) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- with different datatypes. This should be a map-reduce operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- with different datatypes. This should be a map-reduce operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + expr: value + type: string + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: UDFToString(_col0) + type: string + sort order: + + Map-reduce partition columns: + expr: UDFToString(_col0) + type: string + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table2 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +242 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +258 +PREHOOK: query: CREATE TABLE test_table3 (key STRING, value1 int, value2 string) PARTITIONED BY (ds STRING) +CLUSTERED BY (value1) SORTED BY (value1) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table3 (key STRING, value1 int, value2 string) PARTITIONED BY (ds STRING) +CLUSTERED BY (value1) SORTED BY (value1) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table3 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, although the bucketing positions dont match +EXPLAIN +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') +SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, although the bucketing positions dont match +EXPLAIN +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') +SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: value + type: string + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') +SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table3@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') +SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table3@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table3 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table3 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +247 +PREHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: select count(*) from test_table3 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table3 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +247 +PREHOOK: query: select count(*) from test_table3 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table3 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- However, since an expression is being selected, it should involve a reducer +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key+a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- However, since an expression is being selected, it should involve a reducer +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key+a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) key))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: (key + key) + type: int + expr: value + type: string + expr: value + type: string + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: UDFToString(_col0) + type: string + sort order: + + Map-reduce partition columns: + expr: UDFToString(_col0) + type: string + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + Index: ql/src/test/results/clientpositive/smb_mapjoin_18.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_18.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_18.q.out (working copy) @@ -0,0 +1,591 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table1 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +247 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +247 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: select count(*) from test_table2 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +247 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +247 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, one of the buckets should be empty +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, one of the buckets should be empty +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL a) key) 238))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: + expr: (key = 238) + type: boolean + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=2 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table2 where ds = '2' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +2 +PREHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +2 +PREHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +0 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '2' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +2 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '2' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +0 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') +SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') +SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '3')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '2')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 3 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +PREHOOK: Output: default@test_table2@ds=2 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +POSTHOOK: Output: default@test_table2@ds=2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table2 where ds = '3' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +0 +PREHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +0 +PREHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +0 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '3' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +0 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '3' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +0 Index: ql/src/test/results/clientpositive/smb_mapjoin_22.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_22.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_22.q.out (working copy) @@ -0,0 +1,360 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + test_table1 + TableScan + alias: test_table1 + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: select count(*) from test_table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: drop table test_table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table1 +POSTHOOK: query: drop table test_table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table1 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + test_table1 + TableScan + alias: test_table1 + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +253 +PREHOOK: query: select count(*) from test_table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +500 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +253 Index: ql/src/test/results/clientpositive/smb_mapjoin_21.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_21.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_21.q.out (working copy) @@ -0,0 +1,568 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort orders does not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort orders does not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: - + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key, value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key, value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the number of buckets do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the number of buckets do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Stats-Aggr Operator + + Index: ql/src/test/queries/clientpositive/smb_mapjoin_20.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_20.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_20.q (working copy) @@ -0,0 +1,53 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table2 (key STRING, value1 STRING, value2 string) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- with different datatypes. This should be a map-reduce operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1'; + +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1'; + +select count(*) from test_table2 where ds = '1'; +select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0; +select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1; + +CREATE TABLE test_table3 (key STRING, value1 int, value2 string) PARTITIONED BY (ds STRING) +CLUSTERED BY (value1) SORTED BY (value1) INTO 2 BUCKETS; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, although the bucketing positions dont match +EXPLAIN +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') +SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') +SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +select count(*) from test_table3 where ds = '1'; +select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 0; +select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 1; +select count(*) from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'; +select count(*) from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- However, since an expression is being selected, it should involve a reducer +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key+a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1'; Index: ql/src/test/queries/clientpositive/smb_mapjoin_21.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_21.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_21.q (working copy) @@ -0,0 +1,77 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +drop table test_table2; + +CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort orders does not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +drop table test_table2; + +CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key, value) INTO 2 BUCKETS; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +drop table test_table2; + +CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (value) INTO 2 BUCKETS; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +drop table test_table2; + +CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the number of buckets do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +drop table test_table2; + +CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) INTO 2 BUCKETS; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; Index: ql/src/test/queries/clientpositive/smb_mapjoin_18.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_18.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_18.q (working copy) @@ -0,0 +1,65 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +select count(*) from test_table1 where ds = '1'; +select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0; +select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1; +select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1'; +select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1'; + +select count(*) from test_table2 where ds = '1'; +select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0; +select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1; +select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'; +select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, one of the buckets should be empty +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238; + +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238; + +select count(*) from test_table2 where ds = '2'; +select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 0; +select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 1; +select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '2'; +select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '2'; + +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') +SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2'; + +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2'; + +select count(*) from test_table2 where ds = '3'; +select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 0; +select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 1; +select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '3'; +select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '3'; Index: ql/src/test/queries/clientpositive/smb_mapjoin_22.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_22.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_22.q (working copy) @@ -0,0 +1,55 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 SELECT *; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1; + +INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1; + +select count(*) from test_table1; +select count(*) from test_table1 tablesample (bucket 2 out of 2) s; + +select count(*) from test_table2; +select count(*) from test_table2 tablesample (bucket 2 out of 2) s; + +drop table test_table1; +drop table test_table2; + +CREATE TABLE test_table1 (key INT, value STRING) +CLUSTERED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) +CLUSTERED BY (key) INTO 2 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 SELECT *; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1; + +INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1; + +select count(*) from test_table1; +select count(*) from test_table1 tablesample (bucket 2 out of 2) s; + +select count(*) from test_table2; +select count(*) from test_table2 tablesample (bucket 2 out of 2) s; Index: ql/src/test/queries/clientpositive/smb_mapjoin_19.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_19.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_19.q (working copy) @@ -0,0 +1,41 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS; + +FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *; + +-- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'; + +select count(*) from test_table1 where ds = '1'; +select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0; +select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5; +select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12; +select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1'; +select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1'; +select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1'; + +select count(*) from test_table2 where ds = '1'; +select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0; +select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5; +select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12; +select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1'; +select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1'; +select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1'; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (working copy) @@ -0,0 +1,393 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hive.common.ObjectPair; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.ql.exec.ExtractOperator; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; + +/** + * This transformation does optimization for enforcing bucketing and sorting. + * For a query of the form: + * insert overwrite table T1 select * from T2; + * where T1 and T2 are bucketized/sorted on the same keys, we don't need a reducer to + * enforce bucketing and sorting. + */ +public class BucketingSortingReduceSinkOptimizer implements Transform { + + private static final Log LOG = LogFactory.getLog(BucketingSortingReduceSinkOptimizer.class + .getName()); + + public BucketingSortingReduceSinkOptimizer() { + } + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + + Map opRules = new LinkedHashMap(); + HiveConf conf = pctx.getConf(); + + // process group-by pattern + opRules.put(new RuleRegExp("R1", + ReduceSinkOperator.getOperatorName() + "%" + + ExtractOperator.getOperatorName() + "%" + + FileSinkOperator.getOperatorName() + "%"), + getBucketSortReduceSinkProc(pctx)); + + // The dispatcher fires the processor corresponding to the closest matching rule + Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, null); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + return pctx; + } + + private NodeProcessor getDefaultProc() { + return new NodeProcessor() { + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + return null; + } + }; + } + + private NodeProcessor getBucketSortReduceSinkProc(ParseContext pctx) { + return new BucketSortReduceSinkProcessor(pctx); + } + + /** + * BucketSortReduceSinkProcessor. + * + */ + public class BucketSortReduceSinkProcessor implements NodeProcessor { + + protected ParseContext pGraphContext; + + public BucketSortReduceSinkProcessor(ParseContext pGraphContext) { + this.pGraphContext = pGraphContext; + } + + private List getBucketPositions(List tabBucketCols, + List tabCols) { + List posns = new ArrayList(); + for (String bucketCol : tabBucketCols) { + int pos = 0; + for (FieldSchema tabCol : tabCols) { + if (bucketCol.equals(tabCol.getName())) { + posns.add(pos); + break; + } + pos++; + } + } + return posns; + } + + private List> getSortPositions(List tabSortCols, + List tabCols) { + List> posns = new ArrayList>(); + for (Order sortCol : tabSortCols) { + int pos = 0; + for (FieldSchema tabCol : tabCols) { + if (sortCol.getCol().equals(tabCol.getName())) { + posns.add(new ObjectPair(pos, sortCol.getOrder())); + break; + } + pos++; + } + } + return posns; + } + + private boolean checkPartition(Partition partition, + List bucketPositionsDest, + List> sortPositionsDest, + int numBucketsDest) { + // The bucketing and sorting positions should exactly match + int numBuckets = partition.getBucketCount(); + if (numBucketsDest != numBuckets) { + return false; + } + + List partnBucketPositions = + getBucketPositions(partition.getBucketCols(), partition.getTable().getCols()); + List> partnSortPositions = + getSortPositions(partition.getSortCols(), partition.getTable().getCols()); + return bucketPositionsDest.equals(partnBucketPositions) && + sortPositionsDest.equals(partnSortPositions); + } + + private boolean checkTable(Table table, + List bucketPositionsDest, + List> sortPositionsDest, + int numBucketsDest) { + // The bucketing and sorting positions should exactly match + int numBuckets = table.getNumBuckets(); + if (numBucketsDest != numBuckets) { + return false; + } + + List tableBucketPositions = + getBucketPositions(table.getBucketCols(), table.getCols()); + List> tableSortPositions = + getSortPositions(table.getSortCols(), table.getCols()); + return bucketPositionsDest.equals(tableBucketPositions) && + sortPositionsDest.equals(tableSortPositions); + } + + private void removeReduceSink(ReduceSinkOperator rsOp, + TableScanOperator tsOp, + FileSinkOperator fsOp, + Table table) { + removeReduceSink(rsOp, tsOp, fsOp); + FileStatus[] srcs = table.getSortedPaths(); + Map bucketFileNameMapping = new HashMap(); + for (int pos = 0; pos < srcs.length; pos++) { + bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos); + } + tsOp.getConf().setBucketFileNameMapping(bucketFileNameMapping); + } + + private void removeReduceSink(ReduceSinkOperator rsOp, + TableScanOperator tsOp, + FileSinkOperator fsOp, + Partition partition) { + removeReduceSink(rsOp, tsOp, fsOp); + FileStatus[] srcs = partition.getSortedPaths(); + Map bucketFileNameMapping = new HashMap(); + for (int pos = 0; pos < srcs.length; pos++) { + bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos); + } + tsOp.getConf().setBucketFileNameMapping(bucketFileNameMapping); + } + + private void removeReduceSink(ReduceSinkOperator rsOp, + TableScanOperator tsOp, + FileSinkOperator fsOp) { + Operator parRSOp = rsOp.getParentOperators().get(0); + parRSOp.getChildOperators().set(0, fsOp); + fsOp.getParentOperators().set(0, parRSOp); + fsOp.getConf().setMultiFileSpray(false); + fsOp.getConf().setTotalFiles(1); + fsOp.getConf().setNumFiles(1); + tsOp.setUseBucketizedHiveInputFormat(true); + } + + private int findColumnPosition(List cols, String colName) { + int pos = 0; + for (FieldSchema col : cols) { + if (colName.equals(col.getName())) { + return pos; + } + pos++; + } + return -1; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + // If the reduce sink has not been introduced due to bucketing/sorting, ignore it + FileSinkOperator fsOp = (FileSinkOperator) nd; + ExtractOperator exOp = (ExtractOperator) fsOp.getParentOperators().get(0); + ReduceSinkOperator rsOp = (ReduceSinkOperator) exOp.getParentOperators().get(0); + + List rsOps = pGraphContext + .getReduceSinkOperatorsAddedByEnforceBucketingSorting(); + // nothing to do + if ((rsOps != null) && (!rsOps.contains(rsOp))) { + return null; + } + + // Support for dynamic partitions can be added later + if (fsOp.getConf().getDynPartCtx() != null) { + return null; + } + + // No conversion is possible for the reduce keys + for (ExprNodeDesc keyCol : rsOp.getConf().getKeyCols()) { + if (!(keyCol instanceof ExprNodeColumnDesc)) { + return null; + } + } + + Table destTable = pGraphContext.getFsopToTable().get(fsOp); + if (destTable == null) { + return null; + } + + // Get the positions for sorted and bucketed columns + List bucketPositions = + getBucketPositions(destTable.getBucketCols(), destTable.getCols()); + List> sortPositions = + getSortPositions(destTable.getSortCols(), destTable.getCols()); + + // Only selects and filters are allowed + Operator op = rsOp; + // TableScan will also be followed by a Select Operator. Find the expressions for the + // bucketed columns for the destination table + List sourceTableBucketCols = new ArrayList(); + List sourceTableSortCols = new ArrayList(); + + while (true) { + if (op.getParentOperators().size() > 1) { + return null; + } + + op = op.getParentOperators().get(0); + if (!(op instanceof TableScanOperator) && + !(op instanceof FilterOperator) && + !(op instanceof SelectOperator)) { + return null; + } + + // nothing to be done for filters - the output schema does not change. + if (op instanceof TableScanOperator) { + Table srcTable = pGraphContext.getTopToTable().get(op); + + // Find the positions of the columns in the table corresponding to the select list. + List newBucketPositions = new ArrayList(); + for (int pos = 0; pos < bucketPositions.size(); pos++) { + ExprNodeColumnDesc col = sourceTableBucketCols.get(pos); + String colName = col.getColumn(); + int bucketPos = findColumnPosition(srcTable.getCols(), colName); + if (bucketPos < 0) { + return null; + } + newBucketPositions.add(bucketPos); + } + + // Find the positions of the columns in the table corresponding to the select list. + List> newSortPositions = + new ArrayList>(); + for (int pos = 0; pos < sortPositions.size(); pos++) { + ExprNodeColumnDesc col = sourceTableSortCols.get(pos); + String colName = col.getColumn(); + int sortPos = findColumnPosition(srcTable.getCols(), colName); + if (sortPos < 0) { + return null; + } + newSortPositions.add( + new ObjectPair(sortPos, sortPositions.get(pos).getSecond())); + } + + + if (srcTable.isPartitioned()) { + PrunedPartitionList prunedParts = pGraphContext.getOpToPartList().get(op); + List partitions = prunedParts.getNotDeniedPartns(); + + // Support for dynamic partitions can be added later + // Don't optimize the case: + // insert overwrite table T1(ds='1', hr) select key, value, hr from T2 where ds = '1'; + // where T1 and T2 are bucketed by the same keys and partitioned by ds. hr + if ((partitions == null) || (partitions.isEmpty()) || (partitions.size() > 1)) { + return null; + } + for (Partition partition : partitions) { + if (!checkPartition(partition, newBucketPositions, newSortPositions, + pGraphContext.getFsopToTable().get(fsOp).getNumBuckets())) { + return null; + } + } + + removeReduceSink(rsOp, (TableScanOperator) op, fsOp, + partitions.get(0)); + return null; + } + else { + if (!checkTable(srcTable, newBucketPositions, newSortPositions, + pGraphContext.getFsopToTable().get(fsOp).getNumBuckets())) { + return null; + } + + removeReduceSink(rsOp, (TableScanOperator) op, fsOp, srcTable); + return null; + } + } + // None of the operators is changing the positions + else if (op instanceof SelectOperator) { + SelectOperator selectOp = (SelectOperator) op; + SelectDesc selectDesc = selectOp.getConf(); + + sourceTableBucketCols.clear(); + sourceTableSortCols.clear(); + + // Only columns can be selected for both sorted and bucketed positions + for (int pos : bucketPositions) { + ExprNodeDesc selectColList = selectDesc.getColList().get(pos); + if (!(selectColList instanceof ExprNodeColumnDesc)) { + return null; + } + sourceTableBucketCols.add((ExprNodeColumnDesc) selectColList); + } + + for (ObjectPair pos : sortPositions) { + ExprNodeDesc selectColList = selectDesc.getColList().get(pos.getFirst()); + if (!(selectColList instanceof ExprNodeColumnDesc)) { + return null; + } + sourceTableSortCols.add((ExprNodeColumnDesc) selectColList); + } + } + } + } + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 1462406) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -58,6 +58,9 @@ /* Add list bucketing pruner. */ transformations.add(new ListBucketingPruner()); } + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEBUCKETINGSORTING)) { + transformations.add(new BucketingSortingReduceSinkOptimizer()); + } } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCP)) { transformations.add(new ColumnPruner()); Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 1462406) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy) @@ -384,11 +384,10 @@ } /** - * mapping from bucket number to bucket path + * get all paths for this partition in a sorted manner */ - // TODO: add test case and clean it up @SuppressWarnings("nls") - public Path getBucketPath(int bucketNum) { + public FileStatus[] getSortedPaths() { try { // Previously, this got the filesystem of the Table, which could be // different from the filesystem of the partition. @@ -407,14 +406,26 @@ if (srcs.length == 0) { return null; } - return srcs[bucketNum].getPath(); + return srcs; } catch (Exception e) { - throw new RuntimeException("Cannot get bucket path for bucket " - + bucketNum, e); + throw new RuntimeException("Cannot get path ", e); } } + /** + * mapping from bucket number to bucket path + */ + // TODO: add test case and clean it up @SuppressWarnings("nls") + public Path getBucketPath(int bucketNum) { + FileStatus srcs[] = getSortedPaths(); + if (srcs.length == 0) { + return null; + } + return srcs[bucketNum].getPath(); + } + + @SuppressWarnings("nls") public Path[] getPath(Sample s) throws HiveException { if (s == null) { return getPath(); Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (revision 1462406) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (working copy) @@ -22,6 +22,7 @@ import java.io.Serializable; import java.net.URI; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; @@ -31,6 +32,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.JavaUtils; @@ -925,4 +927,30 @@ Hive hive = Hive.get(); return hive.getIndexes(getTTable().getDbName(), getTTable().getTableName(), max); } + + @SuppressWarnings("nls") + public FileStatus[] getSortedPaths() { + try { + // Previously, this got the filesystem of the Table, which could be + // different from the filesystem of the partition. + FileSystem fs = FileSystem.get(getPath().toUri(), Hive.get() + .getConf()); + String pathPattern = getPath().toString(); + if (getNumBuckets() > 0) { + pathPattern = pathPattern + "/*"; + } + LOG.info("Path pattern = " + pathPattern); + FileStatus srcs[] = fs.globStatus(new Path(pathPattern)); + Arrays.sort(srcs); + for (FileStatus src : srcs) { + LOG.info("Got file: " + src.getPath()); + } + if (srcs.length == 0) { + return null; + } + return srcs; + } catch (Exception e) { + throw new RuntimeException("Cannot get path ", e); + } + } }; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (revision 1462406) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (working copy) @@ -94,6 +94,19 @@ @Override public void cleanUpInputFileChangedOp() throws HiveException { inputFileChanged = true; + // If the file name to bucket number mapping is maintained, store the bucket number + // in the execution context. This is needed for the following scenario: + // insert overwrite table T1 select * from T2; + // where T1 and T2 are sorted/bucketed by the same keys into the same number of buckets + // Although one mapper per file is used (bucketizedinputhiveinput), it is possible that + // any mapper can pick up any file (depending on the size of the files). The bucket number + // corresponding to the input file is stored to name the output bucket file appropriately. + Map bucketNameMapping = conf.getBucketFileNameMapping(); + if ((bucketNameMapping != null) && (!bucketNameMapping.isEmpty())) { + String currentInputFile = getExecContext().getCurrentInputFile(); + getExecContext().setFileId(Integer.toString(bucketNameMapping.get( + Utilities.getFileNameFromDirName(currentInputFile)))); + } } private void gatherStats(Object row) { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java (revision 1462406) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java (working copy) @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; @@ -46,7 +47,7 @@ /** * Used for split sampling (row count per split) * For example, - * select count(1) from ss_src2 tablesample(10 ROWS); + * select count(1) from ss_src2 tablesample (10 ROWS) s; * provides first 10 rows from all input splits */ private int rowLimit = -1; @@ -67,6 +68,9 @@ public static final String FILTER_TEXT_CONF_STR = "hive.io.filter.text"; + // input file name (big) to bucket number + private Map bucketFileNameMapping; + @SuppressWarnings("nls") public TableScanDesc() { } @@ -170,4 +174,12 @@ public Integer getRowLimitExplain() { return rowLimit >= 0 ? rowLimit : null; } + + public Map getBucketFileNameMapping() { + return bucketFileNameMapping; + } + + public void setBucketFileNameMapping(Map bucketFileNameMapping) { + this.bucketFileNameMapping = bucketFileNameMapping; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1462406) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -198,6 +198,8 @@ private Map joinContext; private Map smbMapJoinContext; private final HashMap topToTable; + private final Map fsopToTable; + private final List reduceSinkOperatorsAddedByEnforceBucketingSorting; private QB qb; private ASTNode ast; private int destTableId; @@ -259,6 +261,8 @@ joinContext = new HashMap(); smbMapJoinContext = new HashMap(); topToTable = new HashMap(); + fsopToTable = new HashMap(); + reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList(); destTableId = 1; uCtx = null; listMapJoinOpsNoReducer = new ArrayList>(); @@ -317,11 +321,13 @@ public ParseContext getParseContext() { return new ParseContext(conf, qb, ast, opToPartPruner, opToPartList, topOps, - topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable, loadTableWork, + topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable, + fsopToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, - opToPartToSkewedPruner, viewAliasToInput); + opToPartToSkewedPruner, viewAliasToInput, + reduceSinkOperatorsAddedByEnforceBucketingSorting); } @SuppressWarnings("nls") @@ -5180,6 +5186,7 @@ + dest_path + " row schema: " + inputRR.toString()); } + fsopToTable.put((FileSinkOperator) output, dest_tab); return output; } @@ -5587,6 +5594,7 @@ partitionCols, order.toString(), numReducers), new RowSchema(inputRR.getColumnInfos()), input), inputRR); interim.setColumnExprMap(colExprMap); + reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator) interim); // Add the extract operator to get the value fields RowResolver out_rwsch = new RowResolver(); @@ -5609,6 +5617,7 @@ LOG.debug("Created ReduceSink Plan for table: " + tab.getTableName() + " row schema: " + out_rwsch.toString()); } + return output; } @@ -8515,11 +8524,12 @@ ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner, opToPartList, topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext, - topToTable, + topToTable, fsopToTable, loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, - opToPartToSkewedPruner, viewAliasToInput); + opToPartToSkewedPruner, viewAliasToInput, + reduceSinkOperatorsAddedByEnforceBucketingSorting); // Generate table access stats if required if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == true) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 1462406) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -30,10 +30,12 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -74,6 +76,8 @@ private Map mapJoinContext; private Map smbMapJoinContext; private HashMap topToTable; + private Map fsopToTable; + private List reduceSinkOperatorsAddedByEnforceBucketingSorting; private HashMap nameToSplitSample; private List loadTableWork; private List loadFileWork; @@ -164,6 +168,7 @@ Map joinContext, Map smbMapJoinContext, HashMap topToTable, + Map fsopToTable, List loadTableWork, List loadFileWork, Context ctx, HashMap idToTableNameMap, int destTableId, UnionProcContext uCtx, List> listMapJoinOpsNoReducer, @@ -174,7 +179,8 @@ HashMap nameToSplitSample, HashSet semanticInputs, List> rootTasks, Map> opToPartToSkewedPruner, - Map viewAliasToInput) { + Map viewAliasToInput, + List reduceSinkOperatorsAddedByEnforceBucketingSorting) { this.conf = conf; this.qb = qb; this.ast = ast; @@ -183,6 +189,7 @@ this.joinContext = joinContext; this.smbMapJoinContext = smbMapJoinContext; this.topToTable = topToTable; + this.fsopToTable = fsopToTable; this.loadFileWork = loadFileWork; this.loadTableWork = loadTableWork; this.opParseCtx = opParseCtx; @@ -203,6 +210,8 @@ this.rootTasks = rootTasks; this.opToPartToSkewedPruner = opToPartToSkewedPruner; this.viewAliasToInput = viewAliasToInput; + this.reduceSinkOperatorsAddedByEnforceBucketingSorting = + reduceSinkOperatorsAddedByEnforceBucketingSorting; } /** @@ -304,6 +313,24 @@ this.topToTable = topToTable; } + public Map getFsopToTable() { + return fsopToTable; + } + + public void setFsopToTable(Map fsopToTable) { + this.fsopToTable = fsopToTable; + } + + public List getReduceSinkOperatorsAddedByEnforceBucketingSorting() { + return reduceSinkOperatorsAddedByEnforceBucketingSorting; + } + + public void setReduceSinkOperatorsAddedByEnforceBucketingSorting( + List reduceSinkOperatorsAddedByEnforceBucketingSorting) { + this.reduceSinkOperatorsAddedByEnforceBucketingSorting = + reduceSinkOperatorsAddedByEnforceBucketingSorting; + } + /** * @return the topOps */ Index: ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (revision 1462406) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (working copy) @@ -31,7 +31,7 @@ public class PrunedPartitionList { // source table - private Table source; + private final Table source; // confirmed partitions - satisfy the partition criteria private Set confirmedPartns; @@ -44,7 +44,7 @@ /** * @param confirmedPartns - * confirmed paritions + * confirmed partitions * @param unknownPartns * unknown partitions */ @@ -62,7 +62,7 @@ /** * get confirmed partitions. - * + * * @return confirmedPartns confirmed paritions */ public Set getConfirmedPartns() { @@ -71,7 +71,7 @@ /** * get unknown partitions. - * + * * @return unknownPartns unknown paritions */ public Set getUnknownPartns() { @@ -80,7 +80,7 @@ /** * get denied partitions. - * + * * @return deniedPartns denied paritions */ public Set getDeniedPartns() { @@ -99,7 +99,7 @@ /** * set confirmed partitions. - * + * * @param confirmedPartns * confirmed paritions */ @@ -109,7 +109,7 @@ /** * set unknown partitions. - * + * * @param unknownPartns * unknown partitions */