Index: ql/src/test/results/clientnegative/alter_numbuckets_partitioned_table.q.out =================================================================== --- ql/src/test/results/clientnegative/alter_numbuckets_partitioned_table.q.out (revision 1382095) +++ ql/src/test/results/clientnegative/alter_numbuckets_partitioned_table.q.out (working copy) @@ -1,24 +0,0 @@ -PREHOOK: query: create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@tst1 -PREHOOK: query: alter table tst1 clustered by (key) into 8 buckets -PREHOOK: type: ALTERTABLE_CLUSTER_SORT -PREHOOK: Input: default@tst1 -PREHOOK: Output: default@tst1 -POSTHOOK: query: alter table tst1 clustered by (key) into 8 buckets -POSTHOOK: type: ALTERTABLE_CLUSTER_SORT -POSTHOOK: Input: default@tst1 -POSTHOOK: Output: default@tst1 -PREHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tst1@ds=1 -POSTHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tst1@ds=1 -POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -FAILED: SemanticException [Error 10130]: Changing the number of buckets for a partitioned table is not allowed. It may lead to wrong results for older partitions Index: ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out =================================================================== --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out (revision 0) +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out (revision 0) @@ -0,0 +1,337 @@ +PREHOOK: query: create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@tst1 +PREHOOK: query: alter table tst1 clustered by (key) into 8 buckets +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@tst1 +PREHOOK: Output: default@tst1 +POSTHOOK: query: alter table tst1 clustered by (key) into 8 buckets +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@tst1 +POSTHOOK: Output: default@tst1 +PREHOOK: query: describe formatted tst1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted tst1 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +ds string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 8 +Bucket Columns: [key] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tst1@ds=1 +POSTHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tst1@ds=1 +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Test changing bucket number + +alter table tst1 clustered by (key) into 12 buckets +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@tst1 +PREHOOK: Output: default@tst1 +POSTHOOK: query: -- Test changing bucket number + +alter table tst1 clustered by (key) into 12 buckets +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@tst1 +POSTHOOK: Output: default@tst1 +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted tst1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted tst1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +ds string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + numFiles 1 + numPartitions 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 12 +Bucket Columns: [key] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test adding sort order + +alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@tst1 +PREHOOK: Output: default@tst1 +POSTHOOK: query: -- Test adding sort order + +alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@tst1 +POSTHOOK: Output: default@tst1 +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted tst1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted tst1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +ds string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + numFiles 1 + numPartitions 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 12 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test changing sort order + +alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@tst1 +PREHOOK: Output: default@tst1 +POSTHOOK: query: -- Test changing sort order + +alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@tst1 +POSTHOOK: Output: default@tst1 +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted tst1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted tst1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +ds string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + numFiles 1 + numPartitions 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 12 +Bucket Columns: [key] +Sort Columns: [Order(col:value, order:0)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test removing test order + +alter table tst1 clustered by (value) into 12 buckets +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@tst1 +PREHOOK: Output: default@tst1 +POSTHOOK: query: -- Test removing test order + +alter table tst1 clustered by (value) into 12 buckets +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@tst1 +POSTHOOK: Output: default@tst1 +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted tst1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted tst1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +ds string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + numFiles 1 + numPartitions 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 12 +Bucket Columns: [value] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Test removing buckets + +alter table tst1 not clustered +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@tst1 +PREHOOK: Output: default@tst1 +POSTHOOK: query: -- Test removing buckets + +alter table tst1 not clustered +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@tst1 +POSTHOOK: Output: default@tst1 +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: describe formatted tst1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted tst1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +ds string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + numFiles 1 + numPartitions 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 Index: ql/src/test/results/clientpositive/bucketmapjoin10.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin10.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin10.q.out (revision 0) @@ -0,0 +1,324 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col6 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col6 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 2 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 3 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 5 + numPartitions 2 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 6950 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + bucket_count 3 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 3 + numPartitions 2 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 3 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 5 + numPartitions 2 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 6950 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +165 val_165 Index: ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out (revision 0) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out (revision 0) @@ -0,0 +1,206 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (value DESC) INTO 1 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (value DESC) INTO 1 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- The partition sorting metadata matches but the table metadata does not, sorted merge join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The partition sorting metadata matches but the table metadata does not, sorted merge join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col6 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col6 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 1 + numPartitions 1 + numRows 500 + partition_columns part + rawDataSize 5312 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 1 + numPartitions 1 + numRows 500 + partition_columns part + rawDataSize 5312 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +498 val_498 Index: ql/src/test/results/clientpositive/bucketmapjoin11.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin11.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin11.q.out (revision 0) @@ -0,0 +1,337 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +PREHOOK: query: -- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is +-- a power of 2 and the bucketing columns match so bucket map join should be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is +-- a power of 2 and the bucketing columns match so bucket map join should be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {part=1/srcbucket20.txt=[part=1/srcbucket20.txt, part=1/srcbucket22.txt, part=2/srcbucket20.txt], part=1/srcbucket21.txt=[part=1/srcbucket21.txt, part=1/srcbucket23.txt, part=2/srcbucket21.txt], part=2/srcbucket20.txt=[part=1/srcbucket20.txt, part=2/srcbucket20.txt], part=2/srcbucket21.txt=[part=1/srcbucket21.txt, part=2/srcbucket21.txt], part=2/srcbucket22.txt=[part=1/srcbucket22.txt, part=2/srcbucket20.txt], part=2/srcbucket23.txt=[part=1/srcbucket23.txt, part=2/srcbucket21.txt]} + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col6 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col6 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 2 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 6 + numPartitions 2 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8562 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 4 + numPartitions 2 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 6 + numPartitions 2 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8562 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +165 val_165 Index: ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out (revision 0) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out (revision 0) @@ -0,0 +1,237 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (value DESC) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (value DESC) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {part=1/000000_0=[part=1/000000_0]} + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col6 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col6 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 1 + numPartitions 1 + numRows 500 + partition_columns part + rawDataSize 5312 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 1 + numPartitions 1 + numRows 500 + partition_columns part + rawDataSize 5312 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +498 val_498 Index: ql/src/test/results/clientpositive/bucketmapjoin8.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin8.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin8.q.out (revision 0) @@ -0,0 +1,406 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The partition bucketing metadata match but the table metadata does not, bucket map join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The partition bucketing metadata match but the table metadata does not, bucket map join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {part=1/srcbucket20.txt=[part=1/srcbucket20.txt], part=1/srcbucket21.txt=[part=1/srcbucket21.txt]} + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col6 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col6 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 1 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 1 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +165 val_165 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {part=1/srcbucket20.txt=[part=1/srcbucket20.txt], part=1/srcbucket21.txt=[part=1/srcbucket21.txt]} + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col6 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col6 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 1 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 1 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +165 val_165 Index: ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out =================================================================== --- ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out (revision 0) +++ ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out (revision 0) @@ -0,0 +1,348 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 1 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 1 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 1 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 1 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 1 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 1 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col6 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col6 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + bucket_count 1 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 1 + numPartitions 2 + numRows 500 + partition_columns part + rawDataSize 5312 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 1 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 2 + numRows 1000 + partition_columns part + rawDataSize 10624 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11624 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + bucket_count 1 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 1 + numPartitions 2 + numRows 500 + partition_columns part + rawDataSize 5312 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 1 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 2 + numRows 1000 + partition_columns part + rawDataSize 10624 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11624 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +498 val_498 Index: ql/src/test/results/clientpositive/bucketmapjoin9.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin9.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin9.q.out (revision 0) @@ -0,0 +1,427 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col6 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col6 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 1 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 1 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +165 val_165 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 DROP PARTITION (part='1') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 DROP PARTITION (part='1') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col6 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col6 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col6 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 1 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numPartitions 1 + numRows 0 + partition_columns part + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +165 val_165 Index: ql/src/test/queries/clientnegative/alter_numbuckets_partitioned_table.q =================================================================== --- ql/src/test/queries/clientnegative/alter_numbuckets_partitioned_table.q (revision 1382095) +++ ql/src/test/queries/clientnegative/alter_numbuckets_partitioned_table.q (working copy) @@ -1,9 +0,0 @@ - -create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets; - -alter table tst1 clustered by (key) into 8 buckets; - -set hive.enforce.bucketing=true; -insert overwrite table tst1 partition (ds='1') select key, value from src; - -alter table tst1 clustered by (key) into 12 buckets; Index: ql/src/test/queries/clientpositive/bucketmapjoin8.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin8.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin8.q (revision 0) @@ -0,0 +1,37 @@ +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; + +CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); + +CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS; + +set hive.optimize.bucketmapjoin=true; + +-- The partition bucketing metadata match but the table metadata does not, bucket map join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1; + +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1; + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS; + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1; + +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1; Index: ql/src/test/queries/clientpositive/bucketmapjoin9.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin9.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin9.q (revision 0) @@ -0,0 +1,43 @@ +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; + +CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); + +CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS; + +set hive.optimize.bucketmapjoin=true; + +-- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1; + +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' LIMIT 1; + +ALTER TABLE srcbucket_mapjoin_part_2 DROP PARTITION (part='1'); +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS; + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1; + +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1; Index: ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q =================================================================== --- ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q (revision 0) +++ ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q (revision 0) @@ -0,0 +1,26 @@ +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; + +CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS; +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src; + +CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS; +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src; + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (value DESC) INTO 1 BUCKETS; + +set hive.optimize.bucketmapjoin=true; +set hive.optimize.bucketmapjoin.sortedmerge = true; + +-- The partition sorting metadata matches but the table metadata does not, sorted merge join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1; + +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1; Index: ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q =================================================================== --- ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q (revision 0) +++ ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q (revision 0) @@ -0,0 +1,26 @@ +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; + +CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS; +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src; + +CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (value DESC) INTO 1 BUCKETS; +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src; + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS; + +set hive.optimize.bucketmapjoin=true; +set hive.optimize.bucketmapjoin.sortedmerge = true; + +-- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1; + +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' LIMIT 1; Index: ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q =================================================================== --- ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q (revision 0) +++ ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q (revision 0) @@ -0,0 +1,32 @@ +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; + +CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 1 BUCKETS; +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src; + +ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 1 BUCKETS; +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') SELECT * FROM src; + +CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 1 BUCKETS; +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src; + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 1 BUCKETS; +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') SELECT * FROM src; + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 1 BUCKETS; + +set hive.optimize.bucketmapjoin=true; +set hive.optimize.bucketmapjoin.sortedmerge = true; + +-- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1; + +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1; Index: ql/src/test/queries/clientpositive/bucketmapjoin10.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin10.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin10.q (revision 0) @@ -0,0 +1,36 @@ +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; + +CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); + +ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); + +CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS; + +set hive.optimize.bucketmapjoin=true; + +-- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1; + +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1; Index: ql/src/test/queries/clientpositive/bucketmapjoin11.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin11.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin11.q (revision 0) @@ -0,0 +1,38 @@ +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; + +CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1'); + +ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2'); + +CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1'); + +ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS; +LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); +LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); + + +set hive.optimize.bucketmapjoin=true; + +-- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is +-- a power of 2 and the bucketing columns match so bucket map join should be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1; + +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL LIMIT 1; Index: ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q =================================================================== --- ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q (revision 0) +++ ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q (revision 0) @@ -0,0 +1,39 @@ + +create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets; + +alter table tst1 clustered by (key) into 8 buckets; + +describe formatted tst1; + +set hive.enforce.bucketing=true; +insert overwrite table tst1 partition (ds='1') select key, value from src; + +-- Test changing bucket number + +alter table tst1 clustered by (key) into 12 buckets; + +describe formatted tst1; + +-- Test adding sort order + +alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets; + +describe formatted tst1; + +-- Test changing sort order + +alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets; + +describe formatted tst1; + +-- Test removing test order + +alter table tst1 clustered by (value) into 12 buckets; + +describe formatted tst1; + +-- Test removing buckets + +alter table tst1 not clustered; + +describe formatted tst1; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (revision 1382095) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (working copy) @@ -1324,24 +1324,6 @@ throw new SemanticException(ErrorMsg.INVALID_BUCKET_NUMBER.getMsg()); } - // If the table is partitioned, the number of buckets cannot be changed - // (unless the table is empty). - // The hive code uses bucket information from the table, and changing the - // number of buckets can lead to wrong results for bucketed join/sampling - // etc. This should be fixed as part of HIVE-3283. - // Once the above jira is fixed, this error check/message should be removed - if (tab.isPartitioned()) { - try { - List partitionNames = db.getPartitionNames(tableName, (short)1); - if ((partitionNames != null) && (!partitionNames.isEmpty())) { - throw new - SemanticException(ErrorMsg.NUM_BUCKETS_CHANGE_NOT_ALLOWED.getMsg()); - } - } catch (HiveException e) { - throw new SemanticException(e.getMessage()); - } - } - AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, numBuckets, bucketCols, sortCols); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 1382095) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy) @@ -344,12 +344,8 @@ return outputFormatClass; } - /** - * The number of buckets is a property of the partition. However - internally - * we are just storing it as a property of the table as a short term measure. - */ public int getBucketCount() { - return table.getNumBuckets(); + return tPartition.getSd().getNumBuckets(); /* * TODO: Keeping this code around for later use when we will support * sampling on tables which are not created with CLUSTERED INTO clause Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1382095) +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy) @@ -216,9 +216,6 @@ UDAF_INVALID_LOCATION(10128, "Not yet supported place for UDAF"), DROP_PARTITION_NON_STRING_PARTCOLS_NONEQUALITY(10129, "Drop partitions for a non string partition columns is not allowed using non-equality"), - NUM_BUCKETS_CHANGE_NOT_ALLOWED(10130, "Changing the number of buckets for a " + - "partitioned table is not allowed. It may lead to wrong results for " + - "older partitions"), ALTER_COMMAND_FOR_VIEWS(10131, "To alter a view you need to use the ALTER VIEW command."), ALTER_COMMAND_FOR_TABLES(10132, "To alter a base table you need to use the ALTER TABLE command."), ALTER_VIEW_DISALLOWED_OP(10133, "Cannot use this form of ALTER on a view"),