From ab88bae1b0a58a002806cf2ced2abce25cfed9fa Mon Sep 17 00:00:00 2001 From: Alexander Kolbasov Date: Mon, 20 Aug 2018 22:25:52 -0700 Subject: [PATCH 1/1] HIVE-19253: HMS ignores tableType property for external tables --- .../org/apache/hive/hcatalog/api/HCatTable.java | 3 +- .../apache/hadoop/hive/ql/util/UpgradeTool.java | 3 +- .../hadoop/hive/metastore/HiveAlterHandler.java | 2 +- .../hadoop/hive/metastore/HiveMetaStore.java | 22 +------ .../apache/hadoop/hive/metastore/ObjectStore.java | 19 ++---- .../metastore/TransactionalValidationListener.java | 6 +- .../hadoop/hive/metastore/cache/CachedStore.java | 14 +--- .../hive/metastore/utils/MetaStoreUtils.java | 32 +++++++-- .../hadoop/hive/metastore/TestObjectStore.java | 75 ++++++++++++++++++++++ 9 files changed, 123 insertions(+), 53 deletions(-) diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java index ed2aef4758..a4344f95e8 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; @@ -137,7 +138,7 @@ public HCatTable(String dbName, String tableName) { tableName = hiveTable.getTableName(); dbName = hiveTable.getDbName(); tableType = hiveTable.getTableType(); - isExternal = hiveTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString()); + isExternal = MetaStoreUtils.isExternalTable(hiveTable); sd = hiveTable.getSd(); for (FieldSchema colFS : sd.getCols()) { cols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/UpgradeTool.java b/ql/src/java/org/apache/hadoop/hive/ql/util/UpgradeTool.java index c523a76659..003bc1484e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/util/UpgradeTool.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/util/UpgradeTool.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.utils.FileUtils; import org.apache.hadoop.hive.metastore.utils.FileUtils.RemoteIteratorWithFilter; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.BucketCodec; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; @@ -516,7 +517,7 @@ private static void processConversion(Table t, List convertToAcid, if(isFullAcidTable(t)) { return; } - if(!TableType.MANAGED_TABLE.name().equalsIgnoreCase(t.getTableType())) { + if (MetaStoreUtils.isExternalTable(t)) { return; } //todo: are HBase, Druid talbes managed in 2.x? 3.0? diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index c551b80d8a..6ae580131e 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -548,7 +548,7 @@ public Partition alterPartition(RawStore msdb, Warehouse wh, String catName, Str // 1) partition SD Location // 2) partition column stats if there are any because of part_name field in HMS table PART_COL_STATS // 3) rename the partition directory if it is not an external table - if (!tbl.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) { + if (!MetaStoreUtils.isExternalTable(tbl)) { // TODO: refactor this into a separate method after master merge, this one is too big. try { db = msdb.getDatabase(catName, dbname); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 324035a809..7bb8b3b03b 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1843,7 +1843,7 @@ private void create_table_core(final RawStore ms, final Table tbl, || tbl.getSd().getLocation().isEmpty()) { tblPath = wh.getDefaultTablePath(db, tbl); } else { - if (!isExternal(tbl) && !MetaStoreUtils.isNonNativeTable(tbl)) { + if (!MetaStoreUtils.isExternalTable(tbl) && !MetaStoreUtils.isNonNativeTable(tbl)) { LOG.warn("Location: " + tbl.getSd().getLocation() + " specified for non-external table:" + tbl.getTableName()); } @@ -2530,9 +2530,9 @@ private boolean drop_table_core(final RawStore ms, final String catName, final S } private boolean checkTableDataShouldBeDeleted(Table tbl, boolean deleteData) { - if (deleteData && isExternal(tbl)) { + if (deleteData && MetaStoreUtils.isExternalTable(tbl)) { // External table data can be deleted if EXTERNAL_TABLE_PURGE is true - return isExternalTablePurge(tbl); + return MetaStoreUtils.isExternalTablePurge(tbl); } return deleteData; } @@ -2852,21 +2852,6 @@ private void truncateTableInternal(String dbName, String tableName, List } } - /** - * Is this an external table? - * - * @param table - * Check if this table is external. - * @return True if the table is external, otherwise false. - */ - private boolean isExternal(Table table) { - return MetaStoreUtils.isExternalTable(table); - } - - private boolean isExternalTablePurge(Table table) { - return MetaStoreUtils.isPropertyTrue(table.getParameters(), MetaStoreUtils.EXTERNAL_TABLE_PURGE); - } - @Override @Deprecated public Table get_table(final String dbname, final String name) throws MetaException, @@ -4294,7 +4279,6 @@ public DropPartitionsResult drop_partitions_req( // We need Partition-s for firing events and for result; DN needs MPartition-s to drop. // Great... Maybe we could bypass fetching MPartitions by issuing direct SQL deletes. tbl = get_table_core(catName, dbName, tblName); - isExternal(tbl); mustPurge = isMustPurge(envContext, tbl); int minCount = 0; RequestPartsSpec spec = request.getParts(); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index d27224b235..61c1ca2891 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -1911,7 +1911,7 @@ private Table convertToTable(MTable mtbl) throws MetaException { // for backwards compatibility with old metastore persistence if (mtbl.getViewOriginalText() != null) { tableType = TableType.VIRTUAL_VIEW.toString(); - } else if (Boolean.parseBoolean(mtbl.getParameters().get("EXTERNAL"))) { + } else if (Boolean.parseBoolean(mtbl.getParameters().get(MetaStoreUtils.EXTERNAL_TABLE_PROP))) { tableType = TableType.EXTERNAL_TABLE.toString(); } else { tableType = TableType.MANAGED_TABLE.toString(); @@ -1954,19 +1954,14 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, DatabaseName.getQualified(catName, tbl.getDbName()) + " doesn't exist."); } - // If the table has property EXTERNAL set, update table type - // accordingly + // If the table type is set as MANAGED, but the table has property + // EXTERNAL, change the table type to EXTERNAL as well. String tableType = tbl.getTableType(); - boolean isExternal = Boolean.parseBoolean(tbl.getParameters().get("EXTERNAL")); - if (TableType.MANAGED_TABLE.toString().equals(tableType)) { - if (isExternal) { - tableType = TableType.EXTERNAL_TABLE.toString(); - } + if (tableType == null) { + tableType = TableType.MANAGED_TABLE.name(); } - if (TableType.EXTERNAL_TABLE.toString().equals(tableType)) { - if (!isExternal) { - tableType = TableType.MANAGED_TABLE.toString(); - } + if (TableType.MANAGED_TABLE.name().equals(tableType) && MetaStoreUtils.isExternalTable(tbl)) { + tableType = TableType.EXTERNAL_TABLE.name(); } PrincipalType ownerPrincipalType = tbl.getOwnerType(); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java index 004acf8f12..2125a6c55c 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -164,7 +164,7 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw + "format (such as ORC): " + Warehouse.getQualifiedName(newTable)); } - if (newTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) { + if (MetaStoreUtils.isExternalTable(newTable)) { throw new MetaException(Warehouse.getQualifiedName(newTable) + " cannot be declared transactional because it's an external table"); } @@ -269,6 +269,10 @@ private void makeAcid(Table newTable) throws MetaException { newTable.getTableType()); return; } + if (MetaStoreUtils.isExternalTable(newTable)) { + LOG.info("Could not make " + Warehouse.getQualifiedName(newTable) + " acid: it's external"); + return; + } if(newTable.getSd().getSortColsSize() > 0) { LOG.info("Could not make " + Warehouse.getQualifiedName(newTable) + " acid: it's sorted"); return; diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 0445cbf909..94bc45908f 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -804,18 +804,10 @@ private void validateTableType(Table tbl) { // If the table has property EXTERNAL set, update table type // accordingly String tableType = tbl.getTableType(); - boolean isExternal = Boolean.parseBoolean(tbl.getParameters().get("EXTERNAL")); - if (TableType.MANAGED_TABLE.toString().equals(tableType)) { - if (isExternal) { - tableType = TableType.EXTERNAL_TABLE.toString(); - } - } - if (TableType.EXTERNAL_TABLE.toString().equals(tableType)) { - if (!isExternal) { - tableType = TableType.MANAGED_TABLE.toString(); - } + if (MetaStoreUtils.isExternalTable(tbl) && + !TableType.EXTERNAL_TABLE.name().equals(tableType)) { + tbl.setTableType(TableType.EXTERNAL_TABLE.name()); } - tbl.setTableType(tableType); } @Override diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 5233bee592..ccfd85b7d3 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -100,6 +100,9 @@ protected DateFormat initialValue() { public static final String EXTERNAL_TABLE_PURGE = "external.table.purge"; + /** Table property marking table as external. */ + public static final String EXTERNAL_TABLE_PROP = "EXTERNAL"; + // Right now we only support one special character '/'. // More special characters can be added accordingly in the future. // NOTE: @@ -108,6 +111,7 @@ protected DateFormat initialValue() { // HIVE_SUPPORT_SPECICAL_CHARACTERS_IN_TABLE_NAMES in HiveConf as well. private static final char[] specialCharactersInTableNames = new char[] { '/' }; + /** * Catches exceptions that can't be handled and bundles them to MetaException * @@ -201,6 +205,20 @@ public static boolean validateName(String name, Configuration conf) { /** * Determines whether a table is an external table. + * A table is external if + *
    + *
  1. Its table type is EXTERNAL_TABLE
  2. + *
  3. It has EXTERNAL parameter set to true
  4. + *
+ *

+ * Ideally we would like to use just the tableType, but this breaks the behavior of existing + * tables which use property to specify that the table is external. To preserve + * backwards-compatible behavior we check the property as well. The thinking is that by + * setting tableType as external or setting EXTERNAL property someone expressly desired to have + * an external table.

+ * + * There is a potentially weird case when tableType is set to and the + * EXTERNAL property is set to false, but we can only (shrug) in this case. * * @param table table of interest * @@ -210,12 +228,16 @@ public static boolean isExternalTable(Table table) { if (table == null) { return false; } + String tableType = table.getTableType(); + if (tableType != null && tableType.equals(TableType.EXTERNAL_TABLE.name())) { + return true; + } Map params = table.getParameters(); if (params == null) { return false; } - return isExternal(params); + return Boolean.parseBoolean(params.get(EXTERNAL_TABLE_PROP)); } /** @@ -234,15 +256,11 @@ public static boolean isExternalTablePurge(Table table) { return false; } - return isPropertyTrue(params, EXTERNAL_TABLE_PURGE); - } - - public static boolean isExternal(Map tableParams){ - return isPropertyTrue(tableParams, "EXTERNAL"); + return Boolean.parseBoolean(params.get(EXTERNAL_TABLE_PURGE)); } public static boolean isPropertyTrue(Map tableParams, String prop) { - return "TRUE".equalsIgnoreCase(tableParams.get(prop)); + return Boolean.parseBoolean(tableParams.get(prop)); } // check if stats need to be (re)calculated diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java index b74c3048fa..6ed557deb6 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -65,6 +65,8 @@ import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.apache.hadoop.hive.metastore.model.MNotificationLog; import org.apache.hadoop.hive.metastore.model.MNotificationNextId; +import org.apache.hadoop.hive.metastore.model.MTable; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.junit.Assert; import org.junit.Assume; import org.junit.Before; @@ -86,6 +88,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; @@ -811,6 +814,64 @@ public void testNotificationOps() throws InterruptedException, MetaException { Assert.assertEquals(0, eventResponse.getEventsSize()); } + /** + * Verify that table type is set correctly based on input table properties. + * Two things are verified: + *

    + *
  1. When EXTERNAL property is set to true, table type should be external
  2. + *
  3. When table type is set to external it should remain external + *
+ * @throws Exception + */ + @Test + public void testExternalTable() throws Exception { + Database db1 = new DatabaseBuilder() + .setName(DB1) + .setDescription("description") + .setLocation("locationurl") + .build(conf); + objectStore.createDatabase(db1); + + List tables = new ArrayList<>(4); + Map expectedValues = new HashMap<>(); + + int i = 1; + // Case 1: EXTERNAL = true, tableType == MANAGED_TABLE + // The result should be external table + Table tbl1 = buildTable(conf, db1, "t" + i++, true, null); + tables.add(tbl1); + expectedValues.put(tbl1.getTableName(), true); + // Case 2: EXTERNAL = false, tableType == EXTERNAL_TABLE + // The result should be external table + Table tbl2 = buildTable(conf, db1, "t" + i++, false, TableType.EXTERNAL_TABLE.name()); + tables.add(tbl2); + expectedValues.put(tbl2.getTableName(), true); + // Case 3: EXTERNAL = false, tableType == EXTERNAL_TABLE + // The result should be external table + Table tbl3 = buildTable(conf, db1, "t" + i++, true, TableType.EXTERNAL_TABLE.name()); + tables.add(tbl3); + expectedValues.put(tbl3.getTableName(), true); + // Case 4: EXTERNAL = false, tableType == MANAGED_TABLE + // The result should be managed table + Table tbl4 = buildTable(conf, db1, "t" + i++, false, null); + tables.add(tbl4); + expectedValues.put(tbl4.getTableName(), false); + + for (Table t: tables) { + objectStore.createTable(t); + + Table tOut = objectStore.getTable(db1.getCatalogName(), DB1, t.getTableName()); + Assert.assertEquals("Table " + t.getTableName() + "has invalid external state", + expectedValues.get(t.getTableName()), MetaStoreUtils.isExternalTable(tOut)); + } + + for (Table t: tables) { + objectStore.dropTable(db1.getCatalogName(), DB1, t.getTableName()); + } + + objectStore.dropDatabase(db1.getCatalogName(), DB1); + } + /** * Test metastore configuration property METASTORE_MAX_EVENT_RESPONSE */ @@ -918,5 +979,19 @@ private void createTestCatalog(String catName) throws MetaException { .build(); objectStore.createCatalog(cat); } + + private Table buildTable(Configuration conf, Database db, String name, boolean external_prop, + String tableType) throws MetaException { + Table tbl = new TableBuilder() + .inDb(db) + .setTableName(name) + .addCol("col1", ColumnType.STRING_TYPE_NAME) + .addTableParam(MetaStoreUtils.EXTERNAL_TABLE_PROP, String.valueOf(external_prop)) + .build(conf); + if (tableType != null) { + tbl.setTableType(tableType); + } + return tbl; + } } -- 2.16.3