diff --git itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index 8f9a03fcd1..3c334fac31 100644 --- itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -294,6 +294,12 @@ public boolean dropPartition(String catName, String dbName, String tableName, Li return objectStore.getPartitions(catName, dbName, tableName, max); } + @Override + public Map getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + return objectStore.getPartitionLocations(catName, dbName, tblName, baseLocationToNotShow, max); + } + @Override public void updateCreationMetadata(String catName, String dbname, String tablename, CreationMetadata cm) throws MetaException { diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index e88f9a5fee..e9d7e7c397 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1505,7 +1505,8 @@ private void drop_database_core(RawStore ms, String catName, " which is not writable by " + SecurityUtils.getUser()); } - if (!isSubdirectory(databasePath, materializedViewPath)) { + if (!FileUtils.isSubdirectory(databasePath.toString(), + materializedViewPath.toString())) { tablePaths.add(materializedViewPath); } } @@ -1545,7 +1546,7 @@ private void drop_database_core(RawStore ms, String catName, " which is not writable by " + SecurityUtils.getUser()); } - if (!isSubdirectory(databasePath, tablePath)) { + if (!FileUtils.isSubdirectory(databasePath.toString(), tablePath.toString())) { tablePaths.add(tablePath); } } @@ -1553,7 +1554,7 @@ private void drop_database_core(RawStore ms, String catName, // For each partition in each table, drop the partitions and get a list of // partitions' locations which might need to be deleted partitionPaths = dropPartitionsAndGetLocations(ms, catName, name, table.getTableName(), - tablePath, table.getPartitionKeys(), deleteData && !isExternal(table)); + tablePath, deleteData && !isExternal(table)); // Drop the table but not its data drop_table(MetaStoreUtils.prependCatalogToDbName(table.getCatName(), table.getDbName(), conf), @@ -1604,20 +1605,6 @@ private void drop_database_core(RawStore ms, String catName, } } - /** - * Returns a BEST GUESS as to whether or not other is a subdirectory of parent. It does not - * take into account any intricacies of the underlying file system, which is assumed to be - * HDFS. This should not return any false positives, but may return false negatives. - * - * @param parent - * @param other - * @return - */ - private boolean isSubdirectory(Path parent, Path other) { - return other.toString().startsWith(parent.toString().endsWith(Path.SEPARATOR) ? - parent.toString() : parent.toString() + Path.SEPARATOR); - } - @Override public void drop_database(final String dbName, final boolean deleteData, final boolean cascade) throws NoSuchObjectException, InvalidOperationException, MetaException { @@ -2482,7 +2469,7 @@ private boolean drop_table_core(final RawStore ms, final String catName, final S // Drop the partitions and get a list of locations which need to be deleted partPaths = dropPartitionsAndGetLocations(ms, catName, dbname, name, tblPath, - tbl.getPartitionKeys(), deleteData && !isExternal); + deleteData && !isExternal); // Drop any constraints on the table ms.dropConstraint(catName, dbname, name, null, true); @@ -2567,79 +2554,75 @@ private void deletePartitionData(List partPaths, boolean ifPurge, Database } /** - * Retrieves the partitions specified by partitionKeys. If checkLocation, for locations of - * partitions which may not be subdirectories of tablePath checks to make the locations are - * writable. + * Deletes the partitions specified by catName, dbName, tableName. If checkLocation is true, for + * locations of partitions which may not be subdirectories of tablePath checks to make sure the + * locations are writable. * * Drops the metadata for each partition. * * Provides a list of locations of partitions which may not be subdirectories of tablePath. * - * @param ms - * @param dbName - * @param tableName - * @param tablePath - * @param partitionKeys - * @param checkLocation - * @return + * @param ms RawStore to use for metadata retrieval and delete + * @param catName The catName + * @param dbName The dbName + * @param tableName The tableName + * @param tablePath The tablePath of which subdirectories does not have to be checked + * @param checkLocation Should we check the locations at all + * @return The list of the Path objects to delete (only in case checkLocation is true) * @throws MetaException * @throws IOException - * @throws InvalidInputException - * @throws InvalidObjectException * @throws NoSuchObjectException */ private List dropPartitionsAndGetLocations(RawStore ms, String catName, String dbName, - String tableName, Path tablePath, List partitionKeys, boolean checkLocation) - throws MetaException, IOException, NoSuchObjectException, InvalidObjectException, - InvalidInputException { - int partitionBatchSize = MetastoreConf.getIntVar(conf, - ConfVars.BATCH_RETRIEVE_MAX); - Path tableDnsPath = null; + String tableName, Path tablePath, boolean checkLocation) + throws MetaException, IOException, NoSuchObjectException { + int batchSize = MetastoreConf.getIntVar(conf, ConfVars.BATCH_RETRIEVE_OBJECTS_MAX); + String tableDnsPath = null; if (tablePath != null) { - tableDnsPath = wh.getDnsPath(tablePath); + tableDnsPath = wh.getDnsPath(tablePath).toString(); } - List partPaths = new ArrayList<>(); - Table tbl = ms.getTable(catName, dbName, tableName); - // call dropPartition on each of the table's partitions to follow the - // procedure for cleanly dropping partitions. + List partPaths = new ArrayList<>(); while (true) { - List partsToDelete = ms.getPartitions(catName, dbName, tableName, partitionBatchSize); - if (partsToDelete == null || partsToDelete.isEmpty()) { - break; - } - List partNames = new ArrayList<>(); - for (Partition part : partsToDelete) { - if (checkLocation && part.getSd() != null && - part.getSd().getLocation() != null) { - - Path partPath = wh.getDnsPath(new Path(part.getSd().getLocation())); - if (tableDnsPath == null || - (partPath != null && !isSubdirectory(tableDnsPath, partPath))) { - if (!wh.isWritable(partPath.getParent())) { - throw new MetaException("Table metadata not deleted since the partition " + - Warehouse.makePartName(partitionKeys, part.getValues()) + - " has parent location " + partPath.getParent() + " which is not writable " + - "by " + SecurityUtils.getUser()); + Map partitionLocations = ms.getPartitionLocations(catName, dbName, tableName, + tableDnsPath, batchSize); + if (partitionLocations == null || partitionLocations.isEmpty()) { + // No more partitions left to drop. Return with the collected path list to delete. + return partPaths; + } + + if (checkLocation) { + for (String partName : partitionLocations.keySet()) { + String pathString = partitionLocations.get(partName); + if (pathString != null) { + Path partPath = wh.getDnsPath(new Path(pathString)); + // Double check here. Maybe Warehouse.getDnsPath revealed relationship between the + // path objects + if (tableDnsPath == null || + !FileUtils.isSubdirectory(tableDnsPath, partPath.toString())) { + if (!wh.isWritable(partPath.getParent())) { + throw new MetaException("Table metadata not deleted since the partition " + + partName + " has parent location " + partPath.getParent() + + " which is not writable by " + SecurityUtils.getUser()); + } + partPaths.add(partPath); } - partPaths.add(partPath); } } - partNames.add(Warehouse.makePartName(tbl.getPartitionKeys(), part.getValues())); } + for (MetaStoreEventListener listener : listeners) { //No drop part listener events fired for public listeners historically, for drop table case. //Limiting to internal listeners for now, to avoid unexpected calls for public listeners. if (listener instanceof HMSMetricsListener) { - for (@SuppressWarnings("unused") Partition part : partsToDelete) { + for (@SuppressWarnings("unused") String partName : partitionLocations.keySet()) { listener.onDropPartition(null); } } } - ms.dropPartitions(catName, dbName, tableName, partNames); - } - return partPaths; + ms.dropPartitions(catName, dbName, tableName, new ArrayList<>(partitionLocations.keySet())); + } } @Override diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index e99f888eef..0d2da7a200 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -2817,6 +2817,52 @@ private boolean dropPartitionCommon(MPartition part) throws NoSuchObjectExceptio return getPartitionsInternal(catName, dbName, tableName, maxParts, true, true); } + @Override + public Map getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + tblName = normalizeIdentifier(tblName); + + boolean success = false; + Query query = null; + Map partLocations = new HashMap<>(); + try { + openTransaction(); + LOG.debug("Executing getPartitionLocations"); + + query = pm.newQuery(MPartition.class); + query.setFilter("this.table.database.catalogName == t1 && this.table.database.name == t2 " + + "&& this.table.tableName == t3"); + query.declareParameters("String t1, String t2, String t3"); + query.setResult("this.partitionName, this.sd.location"); + if (max >= 0) { + //Row limit specified, set it on the Query + query.setRange(0, max); + } + + List result = (List)query.execute(catName, dbName, tblName); + for(Object[] row:result) { + String location = (String)row[1]; + if (baseLocationToNotShow != null && location != null + && FileUtils.isSubdirectory(baseLocationToNotShow, location)) { + location = null; + } + partLocations.put((String)row[0], location); + } + LOG.debug("Done executing query for getPartitionLocations"); + success = commitTransaction(); + } finally { + if (!success) { + rollbackTransaction(); + } + if (query != null) { + query.closeAll(); + } + } + return partLocations; + } + protected List getPartitionsInternal(String catName, String dbName, String tblName, final int maxParts, boolean allowSql, boolean allowJdo) throws MetaException, NoSuchObjectException { diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java index bbbdf21d4b..c8905c8698 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -362,6 +362,21 @@ boolean dropPartition(String catName, String dbName, String tableName, List getPartitions(String catName, String dbName, String tableName, int max) throws MetaException, NoSuchObjectException; + /** + * Get the location for every partition of a given table. If a partition location is a child of + * baseLocationToNotShow then the partitionName is returned, but the only null location is + * returned. + * @param catName catalog name. + * @param dbName database name. + * @param tblName table name. + * @param baseLocationToNotShow Partition locations which are child of this path are omitted, and + * null value returned instead. + * @param max The maximum number of partition locations returned, or -1 for all + * @return The map of the partitionName, location pairs + */ + Map getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max); + /** * Alter a table. * @param catName catalog the table is in. diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 7c3588d104..1da9798093 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -1038,6 +1038,12 @@ public void dropPartitions(String catName, String dbName, String tblName, List getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + return rawStore.getPartitionLocations(catName, dbName, tblName, baseLocationToNotShow, max); + } + @Override public void alterTable(String catName, String dbName, String tblName, Table newTable) throws InvalidObjectException, MetaException { diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java index ec9e9e2b95..963e12f9d8 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java @@ -510,4 +510,18 @@ public static Path makeQualified(Path path, Configuration conf) throws IOExcepti return new Path(scheme, authority, pathUri.getPath()); } + + + /** + * Returns a BEST GUESS as to whether or not other is a subdirectory of parent. It does not + * take into account any intricacies of the underlying file system, which is assumed to be + * HDFS. This should not return any false positives, but may return false negatives. + * + * @param parent + * @param other Directory to check if it is a subdirectory of parent + * @return True, if other is subdirectory of parent + */ + public static boolean isSubdirectory(String parent, String other) { + return other.startsWith(parent.endsWith(Path.SEPARATOR) ? parent : parent + Path.SEPARATOR); + } } diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index 7c7429db15..8e195d04fe 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -271,6 +271,12 @@ public boolean dropPartition(String catName, String dbName, String tableName, Li return objectStore.getPartitions(catName, dbName, tableName, max); } + @Override + public Map getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + return objectStore.getPartitionLocations(catName, dbName, tblName, baseLocationToNotShow, max); + } + @Override public void alterTable(String catName, String dbName, String name, Table newTable) throws InvalidObjectException, MetaException { diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index e4f2a17d64..85eb6d554b 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -269,6 +269,12 @@ public boolean dropPartition(String catName, String dbName, String tableName, Li return Collections.emptyList(); } + @Override + public Map getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + return Collections.emptyMap(); + } + @Override public void alterTable(String catName, String dbname, String name, Table newTable) throws InvalidObjectException, MetaException { diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java index 1a57df2680..55ef1533e2 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java @@ -57,6 +57,7 @@ private MetaStoreFactoryForTests() {} // set some values to use for getting conf. vars MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.METRICS_ENABLED, true); MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX, 2); + MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_OBJECTS_MAX, 2); MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.LIMIT_PARTITION_REQUEST, DEFAULT_LIMIT_PARTITION_REQUEST); MetaStoreTestUtils.setConfForStandloneMode(conf); diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java index e1c3dcb47f..c40b42ab48 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java @@ -167,12 +167,19 @@ public void setUp() throws Exception { .create(client, metaStore.getConf()); // Create partitions for the partitioned table - for(int i=0; i < 3; i++) { + for(int i=0; i < 2; i++) { new PartitionBuilder() .inTable(testTables[3]) .addValue("a" + i) .addToTable(client, metaStore.getConf()); } + // Add an external partition too + new PartitionBuilder() + .inTable(testTables[3]) + .addValue("a2") + .setLocation(metaStore.getWarehouseRoot() + "/external/a2") + .addToTable(client, metaStore.getConf()); + // Add data files to the partitioned table List partitions = client.listPartitions(testTables[3].getDbName(), testTables[3].getTableName(), (short)-1); @@ -530,6 +537,8 @@ public void testDropTableCaseInsensitive() throws Exception { @Test public void testDropTableDeleteDir() throws Exception { Table table = testTables[0]; + Partition externalPartition = client.getPartition(partitionedTable.getDbName(), + partitionedTable.getTableName(), "test_part_col=a2"); client.dropTable(table.getDbName(), table.getTableName(), true, false); @@ -547,6 +556,9 @@ public void testDropTableDeleteDir() throws Exception { Assert.assertFalse("Table path should be removed", metaStore.isPathExists(new Path(partitionedTable.getSd().getLocation()))); + + Assert.assertFalse("Extra partition path should be removed", + metaStore.isPathExists(new Path(externalPartition.getSd().getLocation()))); } @Test