diff --git metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java index 3e74675..78c0eb9 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hive.metastore; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hive.metastore.api.*; import org.apache.hadoop.hive.metastore.events.PreAlterTableEvent; import org.apache.hadoop.hive.metastore.events.PreCreateTableEvent; @@ -25,13 +29,18 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.regex.Pattern; final class TransactionalValidationListener extends MetaStorePreEventListener { public static final Logger LOG = LoggerFactory.getLogger(TransactionalValidationListener.class); + public static final Pattern COPY_N_PATTERN = + Pattern.compile("[0-9]+_[0-9]+" + "_copy_" + "[0-9]+"); + TransactionalValidationListener(Configuration conf) { super(conf); } @@ -89,7 +98,12 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw if (newTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) { throw new MetaException(newTable.getDbName() + "." + newTable.getTableName() + - " cannot be declared transactional because it's an external table"); + " cannot be declared transactional because it's an external table"); + } + + if (containsCopyNFiles(context.getHandler().getMS(), newTable)) { + throw new MetaException(newTable.getDbName() + "." + newTable.getTableName() + + " cannot be declared transactional because it has _COPY_N files."); } return; @@ -187,4 +201,42 @@ private boolean conformToAcid(Table table) throws MetaException { return true; } -} \ No newline at end of file + + /** + * Check if table contains *_copy_N files. The table can't be converted to ACID if it does. + * See HIVE-16177 for details. + * @param table + * @return True if table contains files named *_copy_N. False otherwise. + * @throws MetaException + */ + boolean containsCopyNFiles(RawStore ms, Table table) throws MetaException { + Warehouse wh = new Warehouse(getConf()); + + try { + Path tablePath; + if (table.getSd().getLocation() == null + || table.getSd().getLocation().isEmpty()) { + tablePath = wh.getDefaultTablePath( + ms.getDatabase(table.getDbName()), table.getTableName()); + } else { + tablePath = wh.getDnsPath(new Path(table.getSd().getLocation())); + } + FileSystem fs = wh.getFs(tablePath); + RemoteIterator iterator = fs.listFiles(tablePath, true); + while (iterator.hasNext()) { + LocatedFileStatus fileStatus = iterator.next(); + if (COPY_N_PATTERN.matcher(fileStatus.getPath().getName()).matches()) { + return true; + } + } + } catch (IOException e) { + throw new MetaException("Unable to list files for " + table.getDbName() + "."+ + table.getTableName()); + } catch (NoSuchObjectException e) { + throw new MetaException("Unable to get location for " + table.getDbName() + "."+ + table.getTableName()); + } + + return false; + } +} diff --git ql/src/test/queries/clientnegative/acid_copy_n1.q ql/src/test/queries/clientnegative/acid_copy_n1.q new file mode 100644 index 0000000..2ae5380 --- /dev/null +++ ql/src/test/queries/clientnegative/acid_copy_n1.q @@ -0,0 +1,8 @@ +create table nocopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false'); +insert into nocopyfiles(a,b) values(1,2); +alter table nocopyfiles SET TBLPROPERTIES ('transactional'='true'); + +create table withcopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false'); +insert into withcopyfiles(a,b) values(1,2); +insert into withcopyfiles(a,b) values(1,3); +alter table withcopyfiles SET TBLPROPERTIES ('transactional'='true'); diff --git ql/src/test/queries/clientnegative/acid_copy_n2.q ql/src/test/queries/clientnegative/acid_copy_n2.q new file mode 100644 index 0000000..d72eb4f --- /dev/null +++ ql/src/test/queries/clientnegative/acid_copy_n2.q @@ -0,0 +1,11 @@ +create database acidtestdb; +use acidtestdb; + +create table nocopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false'); +insert into nocopyfiles(a,b) values(1,2); +alter table nocopyfiles SET TBLPROPERTIES ('transactional'='true'); + +create table withcopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false'); +insert into withcopyfiles(a,b) values(1,2); +insert into withcopyfiles(a,b) values(1,3); +alter table withcopyfiles SET TBLPROPERTIES ('transactional'='true'); diff --git ql/src/test/queries/clientnegative/acid_copy_n3.q ql/src/test/queries/clientnegative/acid_copy_n3.q new file mode 100644 index 0000000..674dc57 --- /dev/null +++ ql/src/test/queries/clientnegative/acid_copy_n3.q @@ -0,0 +1,12 @@ +create database acidtestdb; +use acidtestdb; + +create table nocopyfiles(a int, b int) partitioned by (c int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false'); +insert into table nocopyfiles partition (c=1) values(1, 1); +alter table nocopyfiles SET TBLPROPERTIES ('transactional'='true'); + +create table withcopyfiles(a int, b int) partitioned by (c int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false'); +insert into table withcopyfiles partition (c=1) values (1, 1); +insert into table withcopyfiles partition (c=1) values (1, 1); +insert into table withcopyfiles partition (c=2) values (2, 2); +alter table withcopyfiles SET TBLPROPERTIES ('transactional'='true'); diff --git ql/src/test/results/clientnegative/acid_copy_n1.q.out ql/src/test/results/clientnegative/acid_copy_n1.q.out new file mode 100644 index 0000000..1ff1090 --- /dev/null +++ ql/src/test/results/clientnegative/acid_copy_n1.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: create table nocopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nocopyfiles +POSTHOOK: query: create table nocopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nocopyfiles +PREHOOK: query: insert into nocopyfiles(a,b) values(1,2) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@nocopyfiles +POSTHOOK: query: insert into nocopyfiles(a,b) values(1,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@nocopyfiles +POSTHOOK: Lineage: nocopyfiles.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: nocopyfiles.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: alter table nocopyfiles SET TBLPROPERTIES ('transactional'='true') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@nocopyfiles +PREHOOK: Output: default@nocopyfiles +POSTHOOK: query: alter table nocopyfiles SET TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@nocopyfiles +POSTHOOK: Output: default@nocopyfiles +PREHOOK: query: create table withcopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@withcopyfiles +POSTHOOK: query: create table withcopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@withcopyfiles +PREHOOK: query: insert into withcopyfiles(a,b) values(1,2) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@withcopyfiles +POSTHOOK: query: insert into withcopyfiles(a,b) values(1,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@withcopyfiles +POSTHOOK: Lineage: withcopyfiles.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: withcopyfiles.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into withcopyfiles(a,b) values(1,3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@withcopyfiles +POSTHOOK: query: insert into withcopyfiles(a,b) values(1,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@withcopyfiles +POSTHOOK: Lineage: withcopyfiles.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: withcopyfiles.b EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: alter table withcopyfiles SET TBLPROPERTIES ('transactional'='true') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@withcopyfiles +PREHOOK: Output: default@withcopyfiles +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Unable to alter table. default.withcopyfiles cannot be declared transactional because it has _COPY_N files. diff --git ql/src/test/results/clientnegative/acid_copy_n2.q.out ql/src/test/results/clientnegative/acid_copy_n2.q.out new file mode 100644 index 0000000..aa610ec --- /dev/null +++ ql/src/test/results/clientnegative/acid_copy_n2.q.out @@ -0,0 +1,71 @@ +PREHOOK: query: create database acidtestdb +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:acidtestdb +POSTHOOK: query: create database acidtestdb +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:acidtestdb +PREHOOK: query: use acidtestdb +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:acidtestdb +POSTHOOK: query: use acidtestdb +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:acidtestdb +PREHOOK: query: create table nocopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Output: acidtestdb@nocopyfiles +PREHOOK: Output: database:acidtestdb +POSTHOOK: query: create table nocopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: acidtestdb@nocopyfiles +POSTHOOK: Output: database:acidtestdb +PREHOOK: query: insert into nocopyfiles(a,b) values(1,2) +PREHOOK: type: QUERY +PREHOOK: Input: acidtestdb@values__tmp__table__1 +PREHOOK: Output: acidtestdb@nocopyfiles +POSTHOOK: query: insert into nocopyfiles(a,b) values(1,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: acidtestdb@values__tmp__table__1 +POSTHOOK: Output: acidtestdb@nocopyfiles +POSTHOOK: Lineage: nocopyfiles.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: nocopyfiles.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: alter table nocopyfiles SET TBLPROPERTIES ('transactional'='true') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: acidtestdb@nocopyfiles +PREHOOK: Output: acidtestdb@nocopyfiles +POSTHOOK: query: alter table nocopyfiles SET TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: acidtestdb@nocopyfiles +POSTHOOK: Output: acidtestdb@nocopyfiles +PREHOOK: query: create table withcopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Output: acidtestdb@withcopyfiles +PREHOOK: Output: database:acidtestdb +POSTHOOK: query: create table withcopyfiles(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: acidtestdb@withcopyfiles +POSTHOOK: Output: database:acidtestdb +PREHOOK: query: insert into withcopyfiles(a,b) values(1,2) +PREHOOK: type: QUERY +PREHOOK: Input: acidtestdb@values__tmp__table__2 +PREHOOK: Output: acidtestdb@withcopyfiles +POSTHOOK: query: insert into withcopyfiles(a,b) values(1,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: acidtestdb@values__tmp__table__2 +POSTHOOK: Output: acidtestdb@withcopyfiles +POSTHOOK: Lineage: withcopyfiles.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: withcopyfiles.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into withcopyfiles(a,b) values(1,3) +PREHOOK: type: QUERY +PREHOOK: Input: acidtestdb@values__tmp__table__3 +PREHOOK: Output: acidtestdb@withcopyfiles +POSTHOOK: query: insert into withcopyfiles(a,b) values(1,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: acidtestdb@values__tmp__table__3 +POSTHOOK: Output: acidtestdb@withcopyfiles +POSTHOOK: Lineage: withcopyfiles.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: withcopyfiles.b EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: alter table withcopyfiles SET TBLPROPERTIES ('transactional'='true') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: acidtestdb@withcopyfiles +PREHOOK: Output: acidtestdb@withcopyfiles +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Unable to alter table. acidtestdb.withcopyfiles cannot be declared transactional because it has _COPY_N files. diff --git ql/src/test/results/clientnegative/acid_copy_n3.q.out ql/src/test/results/clientnegative/acid_copy_n3.q.out new file mode 100644 index 0000000..6878013 --- /dev/null +++ ql/src/test/results/clientnegative/acid_copy_n3.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: create database acidtestdb +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:acidtestdb +POSTHOOK: query: create database acidtestdb +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:acidtestdb +PREHOOK: query: use acidtestdb +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:acidtestdb +POSTHOOK: query: use acidtestdb +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:acidtestdb +PREHOOK: query: create table nocopyfiles(a int, b int) partitioned by (c int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Output: acidtestdb@nocopyfiles +PREHOOK: Output: database:acidtestdb +POSTHOOK: query: create table nocopyfiles(a int, b int) partitioned by (c int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: acidtestdb@nocopyfiles +POSTHOOK: Output: database:acidtestdb +PREHOOK: query: insert into table nocopyfiles partition (c=1) values(1, 1) +PREHOOK: type: QUERY +PREHOOK: Input: acidtestdb@values__tmp__table__1 +PREHOOK: Output: acidtestdb@nocopyfiles@c=1 +POSTHOOK: query: insert into table nocopyfiles partition (c=1) values(1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: acidtestdb@values__tmp__table__1 +POSTHOOK: Output: acidtestdb@nocopyfiles@c=1 +POSTHOOK: Lineage: nocopyfiles PARTITION(c=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: nocopyfiles PARTITION(c=1).b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: alter table nocopyfiles SET TBLPROPERTIES ('transactional'='true') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: acidtestdb@nocopyfiles +PREHOOK: Output: acidtestdb@nocopyfiles +POSTHOOK: query: alter table nocopyfiles SET TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: acidtestdb@nocopyfiles +POSTHOOK: Output: acidtestdb@nocopyfiles +PREHOOK: query: create table withcopyfiles(a int, b int) partitioned by (c int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Output: acidtestdb@withcopyfiles +PREHOOK: Output: database:acidtestdb +POSTHOOK: query: create table withcopyfiles(a int, b int) partitioned by (c int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES('transactional'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: acidtestdb@withcopyfiles +POSTHOOK: Output: database:acidtestdb +PREHOOK: query: insert into table withcopyfiles partition (c=1) values (1, 1) +PREHOOK: type: QUERY +PREHOOK: Input: acidtestdb@values__tmp__table__2 +PREHOOK: Output: acidtestdb@withcopyfiles@c=1 +POSTHOOK: query: insert into table withcopyfiles partition (c=1) values (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: acidtestdb@values__tmp__table__2 +POSTHOOK: Output: acidtestdb@withcopyfiles@c=1 +POSTHOOK: Lineage: withcopyfiles PARTITION(c=1).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: withcopyfiles PARTITION(c=1).b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into table withcopyfiles partition (c=1) values (1, 1) +PREHOOK: type: QUERY +PREHOOK: Input: acidtestdb@values__tmp__table__3 +PREHOOK: Output: acidtestdb@withcopyfiles@c=1 +POSTHOOK: query: insert into table withcopyfiles partition (c=1) values (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: acidtestdb@values__tmp__table__3 +POSTHOOK: Output: acidtestdb@withcopyfiles@c=1 +POSTHOOK: Lineage: withcopyfiles PARTITION(c=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: withcopyfiles PARTITION(c=1).b EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into table withcopyfiles partition (c=2) values (2, 2) +PREHOOK: type: QUERY +PREHOOK: Input: acidtestdb@values__tmp__table__4 +PREHOOK: Output: acidtestdb@withcopyfiles@c=2 +POSTHOOK: query: insert into table withcopyfiles partition (c=2) values (2, 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: acidtestdb@values__tmp__table__4 +POSTHOOK: Output: acidtestdb@withcopyfiles@c=2 +POSTHOOK: Lineage: withcopyfiles PARTITION(c=2).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: withcopyfiles PARTITION(c=2).b EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: alter table withcopyfiles SET TBLPROPERTIES ('transactional'='true') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: acidtestdb@withcopyfiles +PREHOOK: Output: acidtestdb@withcopyfiles +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Unable to alter table. acidtestdb.withcopyfiles cannot be declared transactional because it has _COPY_N files.