diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java deleted file mode 100644 index d8e771d0ffa7d680b2a22436727f896674cd40ff..0000000000000000000000000000000000000000 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.metadata; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.thrift.TException; - -/** - * Use this to get Table objects for a table list. It provides an iterator to - * on the resulting Table objects. It batches the calls to - * IMetaStoreClient.getTableObjectsByName to avoid OOM issues in HS2 (with - * embedded metastore) or MetaStore server (if HS2 is using remote metastore). - * - */ -public class TableIterable implements Iterable { - - @Override - public Iterator
iterator() { - return new Iterator
() { - - private final Iterator tableNamesIter = tableNames.iterator(); - private Iterator batchIter = null; - - @Override - public boolean hasNext() { - return ((batchIter != null) && batchIter.hasNext()) || tableNamesIter.hasNext(); - } - - @Override - public Table next() { - if ((batchIter == null) || !batchIter.hasNext()) { - getNextBatch(); - } - return batchIter.next(); - } - - private void getNextBatch() { - // get next batch of table names in this list - List nameBatch = new ArrayList(); - int batch_counter = 0; - while (batch_counter < batch_size && tableNamesIter.hasNext()) { - nameBatch.add(tableNamesIter.next()); - batch_counter++; - } - // get the Table objects for this batch of table names and get iterator - // on it - try { - try { - batchIter = msc.getTableObjectsByName(dbname, nameBatch).iterator(); - } catch (TException e) { - throw new HiveException(e); - } - } catch (HiveException e) { - throw new RuntimeException(e); - } - } - - @Override - public void remove() { - throw new IllegalStateException( - "TableIterable is a read-only iterable and remove() is unsupported"); - } - }; - } - - private final IMetaStoreClient msc; - private final String dbname; - private final List tableNames; - private final int batch_size; - - /** - * Primary constructor that fetches all tables in a given msc, given a Hive - * object,a db name and a table name list - */ - public TableIterable(IMetaStoreClient msc, String dbname, List tableNames, int batch_size) - throws TException { - this.msc = msc; - this.dbname = dbname; - this.tableNames = tableNames; - this.batch_size = batch_size; - } - -} diff --git ql/src/test/org/apache/hadoop/hive/ql/metadata/TestTableIterable.java ql/src/test/org/apache/hadoop/hive/ql/metadata/TestTableIterable.java deleted file mode 100644 index 6637d150b84c9fa86e6a3a90449606437e7c9d72..0000000000000000000000000000000000000000 --- ql/src/test/org/apache/hadoop/hive/ql/metadata/TestTableIterable.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.metadata; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; - -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.junit.Test; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.UnknownDBException; -import org.apache.thrift.TException; - -import static org.junit.Assert.*; -import static org.mockito.Mockito.*; -/** - * Unit tests for TableIterable - */ -public class TestTableIterable { - - @Test - public void testNumReturned() throws MetaException, InvalidOperationException, UnknownDBException, TException { - HiveMetaStoreClient msc = mock(HiveMetaStoreClient.class); - - - // create a mocked metastore client that returns 3 table objects every time it is called - // will use same size for TableIterable batch fetch size - List
threeTables = Arrays.asList(new Table(), new Table(), new Table()); - when(msc.getTableObjectsByName(anyString(), anyListOf(String.class))).thenReturn(threeTables); - - List tableNames = Arrays.asList("a", "b", "c", "d", "e", "f"); - TableIterable tIterable = new TableIterable(msc, "dummy", tableNames, threeTables.size()); - tIterable.iterator(); - - Iterator
tIter = tIterable.iterator(); - int size = 0; - while(tIter.hasNext()) { - size++; - tIter.next(); - } - assertEquals("Number of table objects returned", size, tableNames.size()); - - verify(msc).getTableObjectsByName("dummy", Arrays.asList("a","b","c")); - verify(msc).getTableObjectsByName("dummy", Arrays.asList("d","e","f")); - - } -} diff --git service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java index 838dd89ca82792ca8af8eb0f30aa63e690e41f43..6bbdce51200aa1cdc7bb1502e0ae6d6aafb22a9d 100644 --- service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java +++ service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java @@ -35,7 +35,7 @@ import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.metadata.TableIterable; +import org.apache.hadoop.hive.metastore.TableIterable; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 92d2e3f368d5fe74c85532f00af3aa4c5ddd69d7..5b6d093dea6ca9fb3e90dce9d70578bf6d068d37 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1471,6 +1471,7 @@ private void drop_database_core(RawStore ms, String catName, if (tables != null && !tables.isEmpty()) { for (Table table : tables) { + // If the table is not external and it might not be in a subdirectory of the database // add it's locations to the list of paths to delete Path tablePath = null; diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 6af2aa5b3a404ce5fbb5bcb741c41e9fc1ced595..4ccc90c5306e217733de0174a5418df6650941f2 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -1028,6 +1028,16 @@ public void dropDatabase(String catalogName, String dbName, boolean deleteData, return; } + String dbNameWithCatalog = prependCatalogToDbName(catalogName, dbName, conf); + + /** + * When dropping db cascade client side hooks have to be called at each table removal. + * If {@link org.apache.hadoop.hive.metastore.conf.MetastoreConf#ConfVars.BATCH_RETRIEVE_MAX + * BATCH_RETRIEVE_MAX} is less than the number of tables in the DB, we'll have to call the + * hooks one by one each alongside with a + * {@link #dropTable(String, String, boolean, boolean, EnvironmentContext) dropTable} call to + * ensure transactionality. + */ if (cascade) { // Note that this logic may drop some of the tables of the database // even if the drop database fail for any reason @@ -1037,18 +1047,97 @@ public void dropDatabase(String catalogName, String dbName, boolean deleteData, // First we delete the materialized views dropTable(dbName, table, deleteData, true); } - List tableList = getAllTables(dbName); - for (String table : tableList) { - // Now we delete the rest of tables - try { - // Subclasses can override this step (for example, for temporary tables) - dropTable(dbName, table, deleteData, true); - } catch (UnsupportedOperationException e) { - // Ignore Index tables, those will be dropped with parent tables + List tableNameList = getAllTables(dbName); + int maxBatchSize = MetastoreConf.getIntVar(conf, ConfVars.BATCH_RETRIEVE_MAX); + + if (tableNameList.size() > maxBatchSize) { + dropDatabaseCascadePerTable(catalogName, dbName, tableNameList, deleteData, maxBatchSize); + } else { + dropDatabaseCascadePerDb(catalogName, dbName, tableNameList, deleteData); + } + + } else { + client.drop_database(dbNameWithCatalog, deleteData, cascade); + } + } + + /** + * Handles dropDatabase by invoking drop_table in HMS for each table. + * Useful when table list in DB is too large to fit in memory. It will retrieve tables in + * chunks and for each table with a drop_table hook it will invoke drop_table on both HMS and + * the hook. This is a timely operation so hookless tables are skipped and will be dropped on + * server side when the client invokes drop_database. + * @param dbName + * @param tableList + * @param deleteData + * @param maxBatchSize + * @throws TException + */ + private void dropDatabaseCascadePerTable(String catName, String dbName, List tableList, + boolean deleteData, int maxBatchSize) throws TException { + String dbNameWithCatalog = prependCatalogToDbName(catName, dbName, conf); + for (Table table : new TableIterable(this, catName, dbName, tableList, maxBatchSize)) { + boolean success = false; + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + try { + hook.preDropTable(table); + client.drop_table_with_environment_context(dbNameWithCatalog, table.getTableName(), deleteData, null); + hook.commitDropTable(table, deleteData); + success = true; + } finally { + if (!success) { + hook.rollbackDropTable(table); + } + } + } + client.drop_database(dbNameWithCatalog, deleteData, true); + } + + /** + * Handles dropDatabase by invoking drop_database in HMS. + * Useful when table list in DB is can fit in memory, it will retrieve all tables at once and + * call drop_database once. Also handles drop_table hooks. + * @param dbName + * @param tableList + * @param deleteData + * @throws TException + */ + private void dropDatabaseCascadePerDb(String catName, String dbName, List tableList, + boolean deleteData) throws TException { + String dbNameWithCatalog = prependCatalogToDbName(catName, dbName, conf); + List
tables = getTableObjectsByName(catName, dbName, tableList); + boolean success = false; + try { + for (Table table : tables) { + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + hook.preDropTable(table); + } + client.drop_database(dbNameWithCatalog, deleteData, true); + for (Table table : tables) { + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + hook.commitDropTable(table, deleteData); + } + success = true; + } finally { + if (!success) { + for (Table table : tables) { + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + hook.rollbackDropTable(table); } } } - client.drop_database(prependCatalogToDbName(catalogName, dbName, conf), deleteData, cascade); } @Override diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TableIterable.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TableIterable.java new file mode 100644 index 0000000000000000000000000000000000000000..59115915673413aeccb50dc73161afbe8ccae6be --- /dev/null +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TableIterable.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; + +/** + * Use this to get Table objects for a table list. It provides an iterator to + * on the resulting Table objects. It batches the calls to + * IMetaStoreClient.getTableObjectsByName to avoid OOM issues in HS2 (with + * embedded metastore) or MetaStore server (if HS2 is using remote metastore). + * + */ +public class TableIterable implements Iterable
{ + + @Override + public Iterator
iterator() { + return new Iterator
() { + + private final Iterator tableNamesIter = tableNames.iterator(); + private Iterator batchIter = null; + + @Override + public boolean hasNext() { + return ((batchIter != null) && batchIter.hasNext()) || tableNamesIter.hasNext(); + } + + @Override + public Table next() { + if ((batchIter == null) || !batchIter.hasNext()) { + getNextBatch(); + } + return batchIter.next(); + } + + private void getNextBatch() { + // get next batch of table names in this list + List nameBatch = new ArrayList(); + int batch_counter = 0; + while (batch_counter < batch_size && tableNamesIter.hasNext()) { + nameBatch.add(tableNamesIter.next()); + batch_counter++; + } + // get the Table objects for this batch of table names and get iterator + // on it + + try { + if (catName != null) { + batchIter = msc.getTableObjectsByName(catName, dbname, nameBatch).iterator(); + } else { + batchIter = msc.getTableObjectsByName(dbname, nameBatch).iterator(); + } + } catch (TException e) { + throw new RuntimeException(e); + } + + } + + @Override + public void remove() { + throw new IllegalStateException( + "TableIterable is a read-only iterable and remove() is unsupported"); + } + }; + } + + private final IMetaStoreClient msc; + private final String dbname; + private final List tableNames; + private final int batch_size; + private final String catName; + + /** + * Primary constructor that fetches all tables in a given msc, given a Hive + * object,a db name and a table name list + */ + public TableIterable(IMetaStoreClient msc, String dbname, List tableNames, int batch_size) + throws TException { + this.msc = msc; + this.catName = null; + this.dbname = dbname; + this.tableNames = tableNames; + this.batch_size = batch_size; + } + + public TableIterable(IMetaStoreClient msc, String catName, String dbname, List + tableNames, int batch_size) throws TException { + this.msc = msc; + this.catName = catName; + this.dbname = dbname; + this.tableNames = tableNames; + this.batch_size = batch_size; + } +} diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestTableIterable.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestTableIterable.java new file mode 100644 index 0000000000000000000000000000000000000000..df96a6dcad09c725f6666eb3a4628bddb033e538 --- /dev/null +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestTableIterable.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.hadoop.hive.metastore.TableIterable; +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.UnknownDBException; +import org.apache.thrift.TException; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; +/** + * Unit tests for TableIterable + */ +@Category(MetastoreUnitTest.class) +public class TestTableIterable { + + @Test + public void testNumReturned() throws MetaException, InvalidOperationException, UnknownDBException, TException { + HiveMetaStoreClient msc = mock(HiveMetaStoreClient.class); + + + // create a mocked metastore client that returns 3 table objects every time it is called + // will use same size for TableIterable batch fetch size + List
threeTables = Arrays.asList(new Table(), new Table(), new Table()); + when(msc.getTableObjectsByName(anyString(), anyListOf(String.class))).thenReturn(threeTables); + + List tableNames = Arrays.asList("a", "b", "c", "d", "e", "f"); + TableIterable tIterable = new TableIterable(msc, "dummy", tableNames, threeTables.size()); + tIterable.iterator(); + + Iterator
tIter = tIterable.iterator(); + int size = 0; + while(tIter.hasNext()) { + size++; + tIter.next(); + } + assertEquals("Number of table objects returned", size, tableNames.size()); + + verify(msc).getTableObjectsByName("dummy", Arrays.asList("a","b","c")); + verify(msc).getTableObjectsByName("dummy", Arrays.asList("d","e","f")); + + } +}