From 3b62c3b4551ec9d976425cdeb96de0502e63121e Mon Sep 17 00:00:00 2001 From: Alexander Kolbasov Date: Tue, 28 Aug 2018 18:17:27 -0700 Subject: [PATCH 1/1] HIVE-19253: HMS ignores tableType property for external tables --- .../org/apache/hive/hcatalog/api/HCatTable.java | 3 +- .../apache/hadoop/hive/ql/util/UpgradeTool.java | 5 ++ .../hadoop/hive/metastore/HiveAlterHandler.java | 2 +- .../hadoop/hive/metastore/HiveMetaStore.java | 22 +------ .../apache/hadoop/hive/metastore/ObjectStore.java | 19 ++---- .../metastore/TransactionalValidationListener.java | 2 +- .../hadoop/hive/metastore/cache/CachedStore.java | 13 +--- .../hive/metastore/utils/MetaStoreUtils.java | 31 +++++++-- .../hadoop/hive/metastore/TestObjectStore.java | 75 ++++++++++++++++++++++ 9 files changed, 120 insertions(+), 52 deletions(-) diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java index ed2aef4758..a4344f95e8 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; @@ -137,7 +138,7 @@ public HCatTable(String dbName, String tableName) { tableName = hiveTable.getTableName(); dbName = hiveTable.getDbName(); tableType = hiveTable.getTableType(); - isExternal = hiveTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString()); + isExternal = MetaStoreUtils.isExternalTable(hiveTable); sd = hiveTable.getSd(); for (FieldSchema colFS : sd.getCols()) { cols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/UpgradeTool.java b/ql/src/java/org/apache/hadoop/hive/ql/util/UpgradeTool.java index c523a76659..1dbcba800a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/util/UpgradeTool.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/util/UpgradeTool.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.utils.FileUtils; import org.apache.hadoop.hive.metastore.utils.FileUtils.RemoteIteratorWithFilter; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.BucketCodec; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; @@ -519,6 +520,10 @@ private static void processConversion(Table t, List convertToAcid, if(!TableType.MANAGED_TABLE.name().equalsIgnoreCase(t.getTableType())) { return; } + + if (MetaStoreUtils.isExternalTable(t)) { + return; + } //todo: are HBase, Druid talbes managed in 2.x? 3.0? String fullTableName = Warehouse.getQualifiedName(t); /* diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 6a1cd3bc9a..0c85e39f01 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -548,7 +548,7 @@ public Partition alterPartition(RawStore msdb, Warehouse wh, String catName, Str // 1) partition SD Location // 2) partition column stats if there are any because of part_name field in HMS table PART_COL_STATS // 3) rename the partition directory if it is not an external table - if (!tbl.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) { + if (!MetaStoreUtils.isExternalTable(tbl)) { // TODO: refactor this into a separate method after master merge, this one is too big. try { db = msdb.getDatabase(catName, dbname); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 33b22a9fc3..24c25ac22f 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1843,7 +1843,7 @@ private void create_table_core(final RawStore ms, final Table tbl, || tbl.getSd().getLocation().isEmpty()) { tblPath = wh.getDefaultTablePath(db, tbl); } else { - if (!isExternal(tbl) && !MetaStoreUtils.isNonNativeTable(tbl)) { + if (!MetaStoreUtils.isExternalTable(tbl) && !MetaStoreUtils.isNonNativeTable(tbl)) { LOG.warn("Location: " + tbl.getSd().getLocation() + " specified for non-external table:" + tbl.getTableName()); } @@ -2530,9 +2530,9 @@ private boolean drop_table_core(final RawStore ms, final String catName, final S } private boolean checkTableDataShouldBeDeleted(Table tbl, boolean deleteData) { - if (deleteData && isExternal(tbl)) { + if (deleteData && MetaStoreUtils.isExternalTable(tbl)) { // External table data can be deleted if EXTERNAL_TABLE_PURGE is true - return isExternalTablePurge(tbl); + return MetaStoreUtils.isExternalTablePurge(tbl); } return deleteData; } @@ -2852,21 +2852,6 @@ private void truncateTableInternal(String dbName, String tableName, List } } - /** - * Is this an external table? - * - * @param table - * Check if this table is external. - * @return True if the table is external, otherwise false. - */ - private boolean isExternal(Table table) { - return MetaStoreUtils.isExternalTable(table); - } - - private boolean isExternalTablePurge(Table table) { - return MetaStoreUtils.isPropertyTrue(table.getParameters(), MetaStoreUtils.EXTERNAL_TABLE_PURGE); - } - @Override @Deprecated public Table get_table(final String dbname, final String name) throws MetaException, @@ -4313,7 +4298,6 @@ public DropPartitionsResult drop_partitions_req( // We need Partition-s for firing events and for result; DN needs MPartition-s to drop. // Great... Maybe we could bypass fetching MPartitions by issuing direct SQL deletes. tbl = get_table_core(catName, dbName, tblName); - isExternal(tbl); mustPurge = isMustPurge(envContext, tbl); int minCount = 0; RequestPartsSpec spec = request.getParts(); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index d27224b235..29a4b4eb7b 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -1911,7 +1911,7 @@ private Table convertToTable(MTable mtbl) throws MetaException { // for backwards compatibility with old metastore persistence if (mtbl.getViewOriginalText() != null) { tableType = TableType.VIRTUAL_VIEW.toString(); - } else if (Boolean.parseBoolean(mtbl.getParameters().get("EXTERNAL"))) { + } else if (Boolean.parseBoolean(mtbl.getParameters().get(MetaStoreUtils.EXTERNAL_TABLE_PROP))) { tableType = TableType.EXTERNAL_TABLE.toString(); } else { tableType = TableType.MANAGED_TABLE.toString(); @@ -1954,19 +1954,14 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, DatabaseName.getQualified(catName, tbl.getDbName()) + " doesn't exist."); } - // If the table has property EXTERNAL set, update table type - // accordingly + // If the table type is set as MANAGED, but the table has property + // EXTERNAL, change the table type to EXTERNAL as well. String tableType = tbl.getTableType(); - boolean isExternal = Boolean.parseBoolean(tbl.getParameters().get("EXTERNAL")); - if (TableType.MANAGED_TABLE.toString().equals(tableType)) { - if (isExternal) { - tableType = TableType.EXTERNAL_TABLE.toString(); - } + if (tableType == null) { + tableType = TableType.MANAGED_TABLE.name(); } - if (TableType.EXTERNAL_TABLE.toString().equals(tableType)) { - if (!isExternal) { - tableType = TableType.MANAGED_TABLE.toString(); - } + if (TableType.MANAGED_TABLE.name().equalsIgnoreCase(tableType) && MetaStoreUtils.isExternalTable(tbl)) { + tableType = TableType.EXTERNAL_TABLE.name(); } PrincipalType ownerPrincipalType = tbl.getOwnerType(); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java index 004acf8f12..0a3830e385 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -164,7 +164,7 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw + "format (such as ORC): " + Warehouse.getQualifiedName(newTable)); } - if (newTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) { + if (MetaStoreUtils.isExternalTable(newTable)) { throw new MetaException(Warehouse.getQualifiedName(newTable) + " cannot be declared transactional because it's an external table"); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 0445cbf909..cd618f4abe 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -804,18 +804,9 @@ private void validateTableType(Table tbl) { // If the table has property EXTERNAL set, update table type // accordingly String tableType = tbl.getTableType(); - boolean isExternal = Boolean.parseBoolean(tbl.getParameters().get("EXTERNAL")); - if (TableType.MANAGED_TABLE.toString().equals(tableType)) { - if (isExternal) { - tableType = TableType.EXTERNAL_TABLE.toString(); - } - } - if (TableType.EXTERNAL_TABLE.toString().equals(tableType)) { - if (!isExternal) { - tableType = TableType.MANAGED_TABLE.toString(); - } + if (MetaStoreUtils.isExternalTable(tbl) && !TableType.EXTERNAL_TABLE.name().equals(tableType)) { + tbl.setTableType(TableType.EXTERNAL_TABLE.name()); } - tbl.setTableType(tableType); } @Override diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index c681a87a1c..d3da37f4c3 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -100,6 +100,9 @@ protected DateFormat initialValue() { public static final String EXTERNAL_TABLE_PURGE = "external.table.purge"; + /** Table property marking table as external. */ + public static final String EXTERNAL_TABLE_PROP = "EXTERNAL"; + // Right now we only support one special character '/'. // More special characters can be added accordingly in the future. // NOTE: @@ -201,6 +204,20 @@ public static boolean validateName(String name, Configuration conf) { /** * Determines whether a table is an external table. + * A table is external if + *
    + *
  1. Its table type is EXTERNAL_TABLE
  2. + *
  3. It has EXTERNAL parameter set to true
  4. + *
+ *

+ * Ideally we would like to use just the tableType, but this breaks the behavior of existing + * tables which use property to specify that the table is external. To preserve + * backwards-compatible behavior we check the property as well. The thinking is that by + * setting tableType as external or setting EXTERNAL property someone expressly desired to have + * an external table.

+ * + * There is a potentially weird case when tableType is set to and the + * EXTERNAL property is set to false, but we can only (shrug) in this case. * * @param table table of interest * @@ -210,12 +227,16 @@ public static boolean isExternalTable(Table table) { if (table == null) { return false; } + String tableType = table.getTableType(); + if (tableType != null && tableType.equals(TableType.EXTERNAL_TABLE.name())) { + return true; + } Map params = table.getParameters(); if (params == null) { return false; } - return isExternal(params); + return Boolean.parseBoolean(params.get(EXTERNAL_TABLE_PROP)); } /** @@ -234,15 +255,11 @@ public static boolean isExternalTablePurge(Table table) { return false; } - return isPropertyTrue(params, EXTERNAL_TABLE_PURGE); - } - - public static boolean isExternal(Map tableParams){ - return isPropertyTrue(tableParams, "EXTERNAL"); + return Boolean.parseBoolean(params.get(EXTERNAL_TABLE_PURGE)); } public static boolean isPropertyTrue(Map tableParams, String prop) { - return "TRUE".equalsIgnoreCase(tableParams.get(prop)); + return Boolean.parseBoolean(tableParams.get(prop)); } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java index b74c3048fa..6ed557deb6 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -65,6 +65,8 @@ import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.apache.hadoop.hive.metastore.model.MNotificationLog; import org.apache.hadoop.hive.metastore.model.MNotificationNextId; +import org.apache.hadoop.hive.metastore.model.MTable; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.junit.Assert; import org.junit.Assume; import org.junit.Before; @@ -86,6 +88,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; @@ -811,6 +814,64 @@ public void testNotificationOps() throws InterruptedException, MetaException { Assert.assertEquals(0, eventResponse.getEventsSize()); } + /** + * Verify that table type is set correctly based on input table properties. + * Two things are verified: + *

    + *
  1. When EXTERNAL property is set to true, table type should be external
  2. + *
  3. When table type is set to external it should remain external + *
+ * @throws Exception + */ + @Test + public void testExternalTable() throws Exception { + Database db1 = new DatabaseBuilder() + .setName(DB1) + .setDescription("description") + .setLocation("locationurl") + .build(conf); + objectStore.createDatabase(db1); + + List tables = new ArrayList<>(4); + Map expectedValues = new HashMap<>(); + + int i = 1; + // Case 1: EXTERNAL = true, tableType == MANAGED_TABLE + // The result should be external table + Table tbl1 = buildTable(conf, db1, "t" + i++, true, null); + tables.add(tbl1); + expectedValues.put(tbl1.getTableName(), true); + // Case 2: EXTERNAL = false, tableType == EXTERNAL_TABLE + // The result should be external table + Table tbl2 = buildTable(conf, db1, "t" + i++, false, TableType.EXTERNAL_TABLE.name()); + tables.add(tbl2); + expectedValues.put(tbl2.getTableName(), true); + // Case 3: EXTERNAL = false, tableType == EXTERNAL_TABLE + // The result should be external table + Table tbl3 = buildTable(conf, db1, "t" + i++, true, TableType.EXTERNAL_TABLE.name()); + tables.add(tbl3); + expectedValues.put(tbl3.getTableName(), true); + // Case 4: EXTERNAL = false, tableType == MANAGED_TABLE + // The result should be managed table + Table tbl4 = buildTable(conf, db1, "t" + i++, false, null); + tables.add(tbl4); + expectedValues.put(tbl4.getTableName(), false); + + for (Table t: tables) { + objectStore.createTable(t); + + Table tOut = objectStore.getTable(db1.getCatalogName(), DB1, t.getTableName()); + Assert.assertEquals("Table " + t.getTableName() + "has invalid external state", + expectedValues.get(t.getTableName()), MetaStoreUtils.isExternalTable(tOut)); + } + + for (Table t: tables) { + objectStore.dropTable(db1.getCatalogName(), DB1, t.getTableName()); + } + + objectStore.dropDatabase(db1.getCatalogName(), DB1); + } + /** * Test metastore configuration property METASTORE_MAX_EVENT_RESPONSE */ @@ -918,5 +979,19 @@ private void createTestCatalog(String catName) throws MetaException { .build(); objectStore.createCatalog(cat); } + + private Table buildTable(Configuration conf, Database db, String name, boolean external_prop, + String tableType) throws MetaException { + Table tbl = new TableBuilder() + .inDb(db) + .setTableName(name) + .addCol("col1", ColumnType.STRING_TYPE_NAME) + .addTableParam(MetaStoreUtils.EXTERNAL_TABLE_PROP, String.valueOf(external_prop)) + .build(conf); + if (tableType != null) { + tbl.setTableType(tableType); + } + return tbl; + } } -- 2.16.3