diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 209349b..0ed5cac 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -751,7 +751,7 @@ public static Properties getPartitionMetadata( org.apache.hadoop.hive.metastore.api.Partition partition, org.apache.hadoop.hive.metastore.api.Table table) { return MetaStoreUtils - .getSchema(partition.getSd(), partition.getSd(), partition + .getSchema(partition.getSd(), table.getSd(), partition .getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys()); } diff --git ql/src/test/queries/clientpositive/partition_data_after_schema_update.q ql/src/test/queries/clientpositive/partition_data_after_schema_update.q new file mode 100644 index 0000000..bd6ef16 --- /dev/null +++ ql/src/test/queries/clientpositive/partition_data_after_schema_update.q @@ -0,0 +1,28 @@ +-- After columns are added to table, partition data retrieval should honor +-- new schema, even if partition was created with old schema. Data added +-- to old partition matching new schema should show up correctly. + +CREATE TABLE IF NOT EXISTS test_part_schema_update (a STRING) +PARTITIONED BY (dt STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +STORED AS TEXTFILE; + +-- table with one column, insert value for that column +INSERT OVERWRITE TABLE test_part_schema_update PARTITION (dt=20131211) +SELECT "1576" FROM src1 LIMIT 1; + +-- this query returns correct values +SELECT * FROM test_part_schema_update WHERE dt=20131211; + +-- add column and append new row with new schema +ALTER TABLE test_part_schema_update ADD COLUMNS (b string); +INSERT INTO TABLE test_part_schema_update PARTITION (dt=20131211) +select "1576", "1577" from src1 LIMIT 1; + +-- this query should return 2 rows. First row has null for new column, but +-- second row should have right value +SELECT * FROM test_part_schema_update WHERE dt=20131211; + +-- cleanup +DROP TABLE test_part_schema_update; diff --git ql/src/test/results/clientpositive/partition_data_after_schema_update.q.out ql/src/test/results/clientpositive/partition_data_after_schema_update.q.out new file mode 100644 index 0000000..1a08b75 --- /dev/null +++ ql/src/test/results/clientpositive/partition_data_after_schema_update.q.out @@ -0,0 +1,106 @@ +PREHOOK: query: -- After columns are added to table, partition data retrieval should honor +-- new schema, even if partition was created with old schema. Data added +-- to old partition matching new schema should show up correctly. + +CREATE TABLE IF NOT EXISTS test_part_schema_update (a STRING) +PARTITIONED BY (dt STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- After columns are added to table, partition data retrieval should honor +-- new schema, even if partition was created with old schema. Data added +-- to old partition matching new schema should show up correctly. + +CREATE TABLE IF NOT EXISTS test_part_schema_update (a STRING) +PARTITIONED BY (dt STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_part_schema_update +PREHOOK: query: -- table with one column, insert value for that column +INSERT OVERWRITE TABLE test_part_schema_update PARTITION (dt=20131211) +SELECT "1576" FROM src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@test_part_schema_update@dt=20131211 +POSTHOOK: query: -- table with one column, insert value for that column +INSERT OVERWRITE TABLE test_part_schema_update PARTITION (dt=20131211) +SELECT "1576" FROM src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@test_part_schema_update@dt=20131211 +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).a SIMPLE [] +PREHOOK: query: -- this query returns correct values +SELECT * FROM test_part_schema_update WHERE dt=20131211 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_part_schema_update +PREHOOK: Input: default@test_part_schema_update@dt=20131211 +#### A masked pattern was here #### +POSTHOOK: query: -- this query returns correct values +SELECT * FROM test_part_schema_update WHERE dt=20131211 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_part_schema_update +POSTHOOK: Input: default@test_part_schema_update@dt=20131211 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).a SIMPLE [] +1576 20131211 +PREHOOK: query: -- add column and append new row with new schema +ALTER TABLE test_part_schema_update ADD COLUMNS (b string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@test_part_schema_update +PREHOOK: Output: default@test_part_schema_update +POSTHOOK: query: -- add column and append new row with new schema +ALTER TABLE test_part_schema_update ADD COLUMNS (b string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@test_part_schema_update +POSTHOOK: Output: default@test_part_schema_update +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).a SIMPLE [] +PREHOOK: query: INSERT INTO TABLE test_part_schema_update PARTITION (dt=20131211) +select "1576", "1577" from src1 LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@test_part_schema_update@dt=20131211 +POSTHOOK: query: INSERT INTO TABLE test_part_schema_update PARTITION (dt=20131211) +select "1576", "1577" from src1 LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@test_part_schema_update@dt=20131211 +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).a SIMPLE [] +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).a SIMPLE [] +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).b SIMPLE [] +PREHOOK: query: -- this query should return 2 rows. First row has null for new column, but +-- second row should have right value +SELECT * FROM test_part_schema_update WHERE dt=20131211 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_part_schema_update +PREHOOK: Input: default@test_part_schema_update@dt=20131211 +#### A masked pattern was here #### +POSTHOOK: query: -- this query should return 2 rows. First row has null for new column, but +-- second row should have right value +SELECT * FROM test_part_schema_update WHERE dt=20131211 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_part_schema_update +POSTHOOK: Input: default@test_part_schema_update@dt=20131211 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).a SIMPLE [] +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).a SIMPLE [] +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).b SIMPLE [] +1576 NULL 20131211 +1576 1577 20131211 +PREHOOK: query: -- cleanup +DROP TABLE test_part_schema_update +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_part_schema_update +PREHOOK: Output: default@test_part_schema_update +POSTHOOK: query: -- cleanup +DROP TABLE test_part_schema_update +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_part_schema_update +POSTHOOK: Output: default@test_part_schema_update +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).a SIMPLE [] +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).a SIMPLE [] +POSTHOOK: Lineage: test_part_schema_update PARTITION(dt=20131211).b SIMPLE []