diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 88381aa..5aedd10 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -427,6 +427,7 @@ minitez.query.files=bucket_map_join_tez1.q,\ mrr.q,\ orc_ppd_basic.q,\ orc_merge_diff_fs.q,\ + partition_column_names_with_leading_and_trailing_spaces.q,\ stats_filemetadata.q,\ tez_aggr_part_stats.q,\ tez_bmj_schema_evolution.q,\ diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 5c49be9..550db83 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -1805,7 +1805,8 @@ public Partition getPartition(String dbName, String tableName, private MPartition getMPartition(String dbName, String tableName, List part_vals) throws MetaException { - MPartition mpart = null; + List mparts = null; + MPartition ret = null; boolean commited = false; Query query = null; try { @@ -1825,10 +1826,27 @@ private MPartition getMPartition(String dbName, String tableName, List p pm.newQuery(MPartition.class, "table.tableName == t1 && table.database.name == t2 && partitionName == t3"); query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - query.setUnique(true); - mpart = (MPartition) query.execute(tableName, dbName, name); - pm.retrieve(mpart); + mparts = (List) query.execute(tableName, dbName, name); + pm.retrieveAll(mparts); commited = commitTransaction(); + // We need to compare partition name with requested name since some DBs + // (like MySQL, Derby) considers 'a' = 'a ' whereas others like (Postgres, + // Oracle) doesn't exhibit this problem. + if (mparts != null && mparts.size() > 0) { + if (mparts.size() > 1) { + throw new MetaException( + "Expecting only one partition but more than one partitions are found."); + } else { + MPartition mpart = mparts.get(0); + if (name.equals(mpart.getPartitionName())) { + ret = mpart; + } else { + throw new MetaException("Expecting a partition with name " + name + + ", but metastore is returning a partition with name " + mpart.getPartitionName() + + "."); + } + } + } } finally { if (!commited) { rollbackTransaction(); @@ -1837,7 +1855,7 @@ private MPartition getMPartition(String dbName, String tableName, List p query.closeAll(); } } - return mpart; + return ret; } /** @@ -7641,37 +7659,10 @@ public void setMetaStoreSchemaVersion(String schemaVersion, String comment) thro @Override public boolean doesPartitionExist(String dbName, String tableName, List partVals) throws MetaException { - boolean success = false; - Query query = null; try { - openTransaction(); - dbName = HiveStringUtils.normalizeIdentifier(dbName); - tableName = HiveStringUtils.normalizeIdentifier(tableName); - - // TODO: this could also be passed from upper layer; or this method should filter the list. - MTable mtbl = getMTable(dbName, tableName); - if (mtbl == null) { - success = commitTransaction(); - return false; - } - query = - pm.newQuery("select partitionName from org.apache.hadoop.hive.metastore.model.MPartition " - + "where table.tableName == t1 && table.database.name == t2 && partitionName == t3"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - query.setUnique(true); - query.setResult("partitionName"); - String name = - Warehouse.makePartName(convertToFieldSchemas(mtbl.getPartitionKeys()), partVals); - String result = (String) query.execute(tableName, dbName, name); - success = commitTransaction(); - return result != null; - } finally { - if (!success) { - rollbackTransaction(); - } - if (query != null) { - query.closeAll(); - } + return this.getPartition(dbName, tableName, partVals) != null; + } catch (NoSuchObjectException e) { + return false; } } diff --git a/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q b/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q new file mode 100644 index 0000000..f087130 --- /dev/null +++ b/ql/src/test/queries/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q @@ -0,0 +1,15 @@ +set hive.mapred.mode=nonstrict; + +create table foo (d string); + +create table foo_p (d string) partitioned by (p string); + +insert into foo values ("1"); + +insert into foo_p partition (p="a ") select foo.d from foo; + +insert into foo_p partition (p="a") select foo.d from foo; + +select * from foo_p where p="a "; + +select * from foo_p where p="a"; diff --git a/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q b/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q new file mode 100644 index 0000000..f087130 --- /dev/null +++ b/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q @@ -0,0 +1,15 @@ +set hive.mapred.mode=nonstrict; + +create table foo (d string); + +create table foo_p (d string) partitioned by (p string); + +insert into foo values ("1"); + +insert into foo_p partition (p="a ") select foo.d from foo; + +insert into foo_p partition (p="a") select foo.d from foo; + +select * from foo_p where p="a "; + +select * from foo_p where p="a"; diff --git a/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out new file mode 100644 index 0000000..4dcee8c --- /dev/null +++ b/ql/src/test/results/clientnegative/partition_column_names_with_leading_and_trailing_spaces.q.out @@ -0,0 +1,35 @@ +PREHOOK: query: create table foo (d string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo (d string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: create table foo_p (d string) partitioned by (p string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo_p +POSTHOOK: query: create table foo_p (d string) partitioned by (p string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo_p +PREHOOK: query: insert into foo values ("1") +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@foo +POSTHOOK: query: insert into foo values ("1") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.d SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Output: default@foo_p@p=a +POSTHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@foo_p@p=a +POSTHOOK: Lineage: foo_p PARTITION(p=a ).d SIMPLE [(foo)foo.FieldSchema(name:d, type:string, comment:null), ] +FAILED: SemanticException [Error 10006]: Line 3:29 Partition not found '"a"' diff --git a/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out b/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out new file mode 100644 index 0000000..2a35c84 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/partition_column_names_with_leading_and_trailing_spaces.q.out @@ -0,0 +1,65 @@ +PREHOOK: query: create table foo (d string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo (d string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: create table foo_p (d string) partitioned by (p string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo_p +POSTHOOK: query: create table foo_p (d string) partitioned by (p string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo_p +PREHOOK: query: insert into foo values ("1") +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@foo +POSTHOOK: query: insert into foo values ("1") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.d SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Output: default@foo_p@p=a +POSTHOOK: query: insert into foo_p partition (p="a ") select foo.d from foo +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@foo_p@p=a +POSTHOOK: Lineage: foo_p PARTITION(p=a ).d SIMPLE [(foo)foo.FieldSchema(name:d, type:string, comment:null), ] +PREHOOK: query: insert into foo_p partition (p="a") select foo.d from foo +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +PREHOOK: Output: default@foo_p@p=a +POSTHOOK: query: insert into foo_p partition (p="a") select foo.d from foo +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@foo_p@p=a +POSTHOOK: Lineage: foo_p PARTITION(p=a).d SIMPLE [(foo)foo.FieldSchema(name:d, type:string, comment:null), ] +PREHOOK: query: select * from foo_p where p="a " +PREHOOK: type: QUERY +PREHOOK: Input: default@foo_p +PREHOOK: Input: default@foo_p@p=a +#### A masked pattern was here #### +POSTHOOK: query: select * from foo_p where p="a " +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo_p +POSTHOOK: Input: default@foo_p@p=a +#### A masked pattern was here #### +1 a +PREHOOK: query: select * from foo_p where p="a" +PREHOOK: type: QUERY +PREHOOK: Input: default@foo_p +PREHOOK: Input: default@foo_p@p=a +#### A masked pattern was here #### +POSTHOOK: query: select * from foo_p where p="a" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo_p +POSTHOOK: Input: default@foo_p@p=a +#### A masked pattern was here #### +1 a