diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 8601df0..0ef11b7 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -23,6 +23,7 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -69,6 +70,7 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.Type; import org.apache.hadoop.hive.metastore.api.UnknownDBException; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; @@ -77,6 +79,7 @@ import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.util.StringUtils; import org.apache.thrift.TException; +import org.junit.Assert; import org.junit.Test; import com.google.common.collect.Lists; @@ -2803,6 +2806,120 @@ private void createTable(String dbName, String tableName) createTable(dbName, tableName, null, null, null, sd, 0); } + @Test + public void testTransactionalValidation() throws Throwable { + String tblName = "acidTable"; + String owner = "acid"; + Map fields = new HashMap(); + fields.put("name", serdeConstants.STRING_TYPE_NAME); + fields.put("income", serdeConstants.INT_TYPE_NAME); + + Type type = createType("Person", fields); + + Map params = new HashMap(); + params.put("transactional", ""); + + Map serdParams = new HashMap(); + serdParams.put(serdeConstants.SERIALIZATION_FORMAT, "1"); + StorageDescriptor sd = createStorageDescriptor(tblName, type.getFields(), params, serdParams); + sd.setNumBuckets(0); + sd.unsetBucketCols(); + + /// CREATE TABLE scenarios + + // Fail - No "transactional" property is specified + try { + Table t = createTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, owner, params, null, sd, 0); + Assert.assertTrue("Expected exception", false); + } catch (MetaException e) { + Assert.assertEquals("'transactional' property of TBLPROPERTIES may only have value 'true'", e.getMessage()); + } + + // Fail - "transactional" property is set to an invalid value + try { + params.clear(); + params.put("transactional", "foobar"); + Table t = createTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, owner, params, null, sd, 0); + Assert.assertTrue("Expected exception", false); + } catch (MetaException e) { + Assert.assertEquals("'transactional' property of TBLPROPERTIES may only have value 'true'", e.getMessage()); + } + + // Fail - "transactional" is set to true, but the table is not bucketed + try { + params.clear(); + params.put("Transactional", "True"); + Table t = createTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, owner, params, null, sd, 0); + Assert.assertTrue("Expected exception", false); + } catch (MetaException e) { + Assert.assertEquals("The table must be bucketed and stored as ORC in order to be transactional", e.getMessage()); + } + + // Fail - "transactional" is set to true, and the table is bucketed, but doesn't use ORC + try { + params.clear(); + params.put("Transactional", "True"); + List bucketCols = new ArrayList(); + bucketCols.add("income"); + sd.setBucketCols(bucketCols); + Table t = createTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, owner, params, null, sd, 0); + Assert.assertTrue("Expected exception", false); + } catch (MetaException e) { + Assert.assertEquals("The table must be bucketed and stored as ORC in order to be transactional", e.getMessage()); + } + + // Succeed - "transactional" is set to true, and the table is bucketed, and uses ORC + params.clear(); + params.put("Transactional", "True"); + List bucketCols = new ArrayList(); + bucketCols.add("income"); + sd.setBucketCols(bucketCols); + sd.setInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"); + sd.setOutputFormat("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"); + Table t = createTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, owner, params, null, sd, 0); + Assert.assertTrue("", "true".equals(t.getParameters().get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL))); + + /// ALTER TABLE scenarios + + // Fail - trying to set "transactional" to "false" is not allowed + try { + params.clear(); + params.put("transactional", "false"); + t = new Table(); + t.setParameters(params); + client.alter_table(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, t); + Assert.assertTrue("Expected exception", false); + } catch (MetaException e) { + Assert.assertEquals("TBLPROPERTIES with 'transactional'='true' cannot be unset", e.getMessage()); + } + + // Fail - trying to set "transactional" to "true" but doesn't satisfy bucketing and Input/OutputFormat requirement + try { + tblName += "1"; + params.clear(); + sd.unsetBucketCols(); + t = createTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, owner, params, null, sd, 0); + params.put("transactional", "true"); + t.setParameters(params); + client.alter_table(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, t); + Assert.assertTrue("Expected exception", false); + } catch (MetaException e) { + Assert.assertEquals("The table must be bucketed and stored as ORC in order to be transactional", e.getMessage()); + } + + // Succeed - trying to set "transactional" to "true", and satisfies bucketing and Input/OutputFormat requirement + tblName += "2"; + params.clear(); + sd.setNumBuckets(1); + sd.setBucketCols(bucketCols); + t = createTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, owner, params, null, sd, 0); + params.put("transactional", "true"); + t.setParameters(params); + t.setPartitionKeys(Collections.EMPTY_LIST); + client.alter_table(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, t); + Assert.assertTrue("", "true".equals(t.getParameters().get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL))); + } + private Table createTable(String dbName, String tblName, String owner, Map tableParams, Map partitionKeys, StorageDescriptor sd, int lastAccessTime) throws Exception { diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index bb33693..4201932 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -533,6 +533,7 @@ public Object getValue() { preListeners = MetaStoreUtils.getMetaStoreListeners(MetaStorePreEventListener.class, hiveConf, hiveConf.getVar(HiveConf.ConfVars.METASTORE_PRE_EVENT_LISTENERS)); + preListeners.add(0, new TransactionalValidationListener(hiveConf)); listeners = MetaStoreUtils.getMetaStoreListeners(MetaStoreEventListener.class, hiveConf, hiveConf.getVar(HiveConf.ConfVars.METASTORE_EVENT_LISTENERS)); listeners.add(new SessionPropertiesListener(hiveConf)); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java new file mode 100644 index 0000000..dffef4a --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -0,0 +1,172 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.*; +import org.apache.hadoop.hive.metastore.events.PreAlterTableEvent; +import org.apache.hadoop.hive.metastore.events.PreCreateTableEvent; +import org.apache.hadoop.hive.metastore.events.PreEventContext; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +final class TransactionalValidationListener extends MetaStorePreEventListener { + TransactionalValidationListener(Configuration conf) { + super(conf); + } + + public void onEvent(PreEventContext context) throws MetaException, NoSuchObjectException, + InvalidOperationException { + switch (context.getEventType()) { + case CREATE_TABLE: + handle((PreCreateTableEvent) context); + break; + case ALTER_TABLE: + handle((PreAlterTableEvent) context); + break; + default: + //no validation required.. + } + } + + private void handle(PreAlterTableEvent context) throws MetaException { + handleAlterTableTransactionalProp(context); + } + + private void handle(PreCreateTableEvent context) throws MetaException { + handleCreateTableTransactionalProp(context); + } + + /** + * once a table is marked transactional, you cannot go back. Enforce this. + */ + private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throws MetaException { + Table newTable = context.getNewTable(); + Map parameters = newTable.getParameters(); + if (parameters == null || parameters.isEmpty()) { + return; + } + Set keys = new HashSet<>(parameters.keySet()); + String transactionalValue = null; + boolean transactionalValuePresent = false; + for (String key : keys) { + if(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.equalsIgnoreCase(key)) { + transactionalValuePresent = true; + transactionalValue = parameters.get(key); + parameters.remove(key); + } + } + if (transactionalValuePresent) { + //normalize prop name + parameters.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, transactionalValue); + } + if ("true".equalsIgnoreCase(transactionalValue)) { + if (!conformToAcid(newTable)) { + throw new MetaException("The table must be bucketed and stored as ORC in order to be transactional"); + } + return; + } + Table oldTable = context.getOldTable(); + String oldTransactionalValue = null; + for (String key : oldTable.getParameters().keySet()) { + if (hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.equalsIgnoreCase(key)) { + oldTransactionalValue = oldTable.getParameters().get(key); + } + } + if (oldTransactionalValue == null ? transactionalValue == null + : oldTransactionalValue.equalsIgnoreCase(transactionalValue)) { + //this covers backward compat cases where this prop may have been set already + return; + } + // if here, there is attempt to set transactional to something other than 'true' + // and NOT the same value it was before + throw new MetaException("TBLPROPERTIES with 'transactional'='true' cannot be unset"); + } + + /** + * Normalize case and make sure: + * 1. 'true' is the only value to be set for 'transactional' (if set at all) + * 2. If set to 'true', we should also enforce bucketing and ORC format + */ + private void handleCreateTableTransactionalProp(PreCreateTableEvent context) throws MetaException { + Table newTable = context.getTable(); + Map parameters = newTable.getParameters(); + if (parameters == null || parameters.isEmpty()) { + return; + } + String transactionalValue = null; + boolean transactionalPropFound = false; + Set keys = new HashSet<>(parameters.keySet()); + for(String key : keys) { + if(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.equalsIgnoreCase(key)) { + transactionalPropFound = true; + transactionalValue = parameters.get(key); + parameters.remove(key); + } + } + + if (!transactionalPropFound) { + return; + } + + if ("false".equalsIgnoreCase(transactionalValue)) { + // just drop transactional=false. For backward compatibility in case someone has scripts + // with transactional=false + return; + } + + if ("true".equalsIgnoreCase(transactionalValue)) { + if (!conformToAcid(newTable)) { + throw new MetaException("The table must be bucketed and stored as ORC in order to be transactional"); + } + + // normalize prop name + parameters.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, Boolean.TRUE.toString()); + return; + } + + // transactional prop is found, but the value is not in expected range + throw new MetaException("'transactional' property of TBLPROPERTIES may only have value 'true'"); + } + + // Check if table is bucketed and InputFormatClass/OutputFormatClass should implement + // AcidInputFormat/AcidOutputFormat + private boolean conformToAcid(Table table) throws MetaException { + StorageDescriptor sd = table.getSd(); + if (sd.getBucketColsSize() < 1) { + return false; + } + + try { + Class inputFormatClass = Class.forName(sd.getInputFormat()); + Class outputFormatClass = Class.forName(sd.getOutputFormat()); + + if (inputFormatClass == null || outputFormatClass == null || + !Class.forName("org.apache.hadoop.hive.ql.io.AcidInputFormat").isAssignableFrom(inputFormatClass) || + !Class.forName("org.apache.hadoop.hive.ql.io.AcidOutputFormat").isAssignableFrom(outputFormatClass)) { + return false; + } + } catch (ClassNotFoundException e) { + throw new MetaException("Invalid input/output format for table"); + } + + return true; + } +} \ No newline at end of file