diff --git hbase-handler/src/test/queries/positive/drop_database_table_hooks.q hbase-handler/src/test/queries/positive/drop_database_table_hooks.q new file mode 100644 index 0000000000000000000000000000000000000000..96263d04ead0e8fc987a8a62c2c4871456746491 --- /dev/null +++ hbase-handler/src/test/queries/positive/drop_database_table_hooks.q @@ -0,0 +1,57 @@ +CREATE DATABASE sometableshavehook; +USE sometableshavehook; + +CREATE TABLE NOHOOK0 (name string, number int); +CREATE TABLE NOHOOK1 (name string, number int); +CREATE TABLE NOHOOK2 (name string, number int); +CREATE TABLE NOHOOK3 (name string, number int); +CREATE TABLE NOHOOK4 (name string, number int); + +CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); +CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); +CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); + +set metastore.batch.retrieve.max=5; +DROP DATABASE sometableshavehook CASCADE; +SHOW DATABASES; + +CREATE DATABASE sometableshavehook; +USE sometableshavehook; + +CREATE TABLE NOHOOK0 (name string, number int); +CREATE TABLE NOHOOK1 (name string, number int); +CREATE TABLE NOHOOK2 (name string, number int); +CREATE TABLE NOHOOK3 (name string, number int); +CREATE TABLE NOHOOK4 (name string, number int); + +CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); +CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); +CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); + +set metastore.batch.retrieve.max=300; +DROP DATABASE sometableshavehook CASCADE; +SHOW DATABASES; \ No newline at end of file diff --git hbase-handler/src/test/results/positive/drop_database_table_hooks.q.out hbase-handler/src/test/results/positive/drop_database_table_hooks.q.out new file mode 100644 index 0000000000000000000000000000000000000000..90713ef840c291dcda4d9bccca10a69bb98d3ede --- /dev/null +++ hbase-handler/src/test/results/positive/drop_database_table_hooks.q.out @@ -0,0 +1,258 @@ +PREHOOK: query: CREATE DATABASE sometableshavehook +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:sometableshavehook +POSTHOOK: query: CREATE DATABASE sometableshavehook +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:sometableshavehook +PREHOOK: query: USE sometableshavehook +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:sometableshavehook +POSTHOOK: query: USE sometableshavehook +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:sometableshavehook +PREHOOK: query: CREATE TABLE NOHOOK0 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK0 +POSTHOOK: query: CREATE TABLE NOHOOK0 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK0 +PREHOOK: query: CREATE TABLE NOHOOK1 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK1 +POSTHOOK: query: CREATE TABLE NOHOOK1 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK1 +PREHOOK: query: CREATE TABLE NOHOOK2 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK2 +POSTHOOK: query: CREATE TABLE NOHOOK2 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK2 +PREHOOK: query: CREATE TABLE NOHOOK3 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK3 +POSTHOOK: query: CREATE TABLE NOHOOK3 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK3 +PREHOOK: query: CREATE TABLE NOHOOK4 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK4 +POSTHOOK: query: CREATE TABLE NOHOOK4 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK4 +PREHOOK: query: CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK0 +POSTHOOK: query: CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK0 +PREHOOK: query: CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK1 +POSTHOOK: query: CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK1 +PREHOOK: query: CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK2 +POSTHOOK: query: CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK2 +PREHOOK: query: DROP DATABASE sometableshavehook CASCADE +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:sometableshavehook +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@hbasehook0 +PREHOOK: Output: sometableshavehook@hbasehook1 +PREHOOK: Output: sometableshavehook@hbasehook2 +PREHOOK: Output: sometableshavehook@nohook0 +PREHOOK: Output: sometableshavehook@nohook1 +PREHOOK: Output: sometableshavehook@nohook2 +PREHOOK: Output: sometableshavehook@nohook3 +PREHOOK: Output: sometableshavehook@nohook4 +POSTHOOK: query: DROP DATABASE sometableshavehook CASCADE +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:sometableshavehook +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@hbasehook0 +POSTHOOK: Output: sometableshavehook@hbasehook1 +POSTHOOK: Output: sometableshavehook@hbasehook2 +POSTHOOK: Output: sometableshavehook@nohook0 +POSTHOOK: Output: sometableshavehook@nohook1 +POSTHOOK: Output: sometableshavehook@nohook2 +POSTHOOK: Output: sometableshavehook@nohook3 +POSTHOOK: Output: sometableshavehook@nohook4 +PREHOOK: query: SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: CREATE DATABASE sometableshavehook +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:sometableshavehook +POSTHOOK: query: CREATE DATABASE sometableshavehook +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:sometableshavehook +PREHOOK: query: USE sometableshavehook +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:sometableshavehook +POSTHOOK: query: USE sometableshavehook +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:sometableshavehook +PREHOOK: query: CREATE TABLE NOHOOK0 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK0 +POSTHOOK: query: CREATE TABLE NOHOOK0 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK0 +PREHOOK: query: CREATE TABLE NOHOOK1 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK1 +POSTHOOK: query: CREATE TABLE NOHOOK1 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK1 +PREHOOK: query: CREATE TABLE NOHOOK2 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK2 +POSTHOOK: query: CREATE TABLE NOHOOK2 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK2 +PREHOOK: query: CREATE TABLE NOHOOK3 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK3 +POSTHOOK: query: CREATE TABLE NOHOOK3 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK3 +PREHOOK: query: CREATE TABLE NOHOOK4 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK4 +POSTHOOK: query: CREATE TABLE NOHOOK4 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK4 +PREHOOK: query: CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK0 +POSTHOOK: query: CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK0 +PREHOOK: query: CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK1 +POSTHOOK: query: CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK1 +PREHOOK: query: CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK2 +POSTHOOK: query: CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK2 +PREHOOK: query: DROP DATABASE sometableshavehook CASCADE +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:sometableshavehook +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@hbasehook0 +PREHOOK: Output: sometableshavehook@hbasehook1 +PREHOOK: Output: sometableshavehook@hbasehook2 +PREHOOK: Output: sometableshavehook@nohook0 +PREHOOK: Output: sometableshavehook@nohook1 +PREHOOK: Output: sometableshavehook@nohook2 +PREHOOK: Output: sometableshavehook@nohook3 +PREHOOK: Output: sometableshavehook@nohook4 +POSTHOOK: query: DROP DATABASE sometableshavehook CASCADE +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:sometableshavehook +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@hbasehook0 +POSTHOOK: Output: sometableshavehook@hbasehook1 +POSTHOOK: Output: sometableshavehook@hbasehook2 +POSTHOOK: Output: sometableshavehook@nohook0 +POSTHOOK: Output: sometableshavehook@nohook1 +POSTHOOK: Output: sometableshavehook@nohook2 +POSTHOOK: Output: sometableshavehook@nohook3 +POSTHOOK: Output: sometableshavehook@nohook4 +PREHOOK: query: SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java deleted file mode 100644 index d8e771d0ffa7d680b2a22436727f896674cd40ff..0000000000000000000000000000000000000000 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.metadata; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.thrift.TException; - -/** - * Use this to get Table objects for a table list. It provides an iterator to - * on the resulting Table objects. It batches the calls to - * IMetaStoreClient.getTableObjectsByName to avoid OOM issues in HS2 (with - * embedded metastore) or MetaStore server (if HS2 is using remote metastore). - * - */ -public class TableIterable implements Iterable { - - @Override - public Iterator
iterator() { - return new Iterator
() { - - private final Iterator tableNamesIter = tableNames.iterator(); - private Iterator batchIter = null; - - @Override - public boolean hasNext() { - return ((batchIter != null) && batchIter.hasNext()) || tableNamesIter.hasNext(); - } - - @Override - public Table next() { - if ((batchIter == null) || !batchIter.hasNext()) { - getNextBatch(); - } - return batchIter.next(); - } - - private void getNextBatch() { - // get next batch of table names in this list - List nameBatch = new ArrayList(); - int batch_counter = 0; - while (batch_counter < batch_size && tableNamesIter.hasNext()) { - nameBatch.add(tableNamesIter.next()); - batch_counter++; - } - // get the Table objects for this batch of table names and get iterator - // on it - try { - try { - batchIter = msc.getTableObjectsByName(dbname, nameBatch).iterator(); - } catch (TException e) { - throw new HiveException(e); - } - } catch (HiveException e) { - throw new RuntimeException(e); - } - } - - @Override - public void remove() { - throw new IllegalStateException( - "TableIterable is a read-only iterable and remove() is unsupported"); - } - }; - } - - private final IMetaStoreClient msc; - private final String dbname; - private final List tableNames; - private final int batch_size; - - /** - * Primary constructor that fetches all tables in a given msc, given a Hive - * object,a db name and a table name list - */ - public TableIterable(IMetaStoreClient msc, String dbname, List tableNames, int batch_size) - throws TException { - this.msc = msc; - this.dbname = dbname; - this.tableNames = tableNames; - this.batch_size = batch_size; - } - -} diff --git ql/src/test/org/apache/hadoop/hive/ql/metadata/TestTableIterable.java ql/src/test/org/apache/hadoop/hive/ql/metadata/TestTableIterable.java deleted file mode 100644 index 6637d150b84c9fa86e6a3a90449606437e7c9d72..0000000000000000000000000000000000000000 --- ql/src/test/org/apache/hadoop/hive/ql/metadata/TestTableIterable.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.metadata; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; - -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.junit.Test; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.UnknownDBException; -import org.apache.thrift.TException; - -import static org.junit.Assert.*; -import static org.mockito.Mockito.*; -/** - * Unit tests for TableIterable - */ -public class TestTableIterable { - - @Test - public void testNumReturned() throws MetaException, InvalidOperationException, UnknownDBException, TException { - HiveMetaStoreClient msc = mock(HiveMetaStoreClient.class); - - - // create a mocked metastore client that returns 3 table objects every time it is called - // will use same size for TableIterable batch fetch size - List
threeTables = Arrays.asList(new Table(), new Table(), new Table()); - when(msc.getTableObjectsByName(anyString(), anyListOf(String.class))).thenReturn(threeTables); - - List tableNames = Arrays.asList("a", "b", "c", "d", "e", "f"); - TableIterable tIterable = new TableIterable(msc, "dummy", tableNames, threeTables.size()); - tIterable.iterator(); - - Iterator
tIter = tIterable.iterator(); - int size = 0; - while(tIter.hasNext()) { - size++; - tIter.next(); - } - assertEquals("Number of table objects returned", size, tableNames.size()); - - verify(msc).getTableObjectsByName("dummy", Arrays.asList("a","b","c")); - verify(msc).getTableObjectsByName("dummy", Arrays.asList("d","e","f")); - - } -} diff --git service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java index 838dd89ca82792ca8af8eb0f30aa63e690e41f43..6bbdce51200aa1cdc7bb1502e0ae6d6aafb22a9d 100644 --- service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java +++ service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java @@ -35,7 +35,7 @@ import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.metadata.TableIterable; +import org.apache.hadoop.hive.metastore.TableIterable; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index e88f9a5fee4b2cbd99ec7c5c5350f8c2b8015384..57c152b01e834cdbded3309e2f6d8feb68599fad 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1534,6 +1534,7 @@ private void drop_database_core(RawStore ms, String catName, if (tables != null && !tables.isEmpty()) { for (Table table : tables) { + // If the table is not external and it might not be in a subdirectory of the database // add it's locations to the list of paths to delete Path tablePath = null; diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index da41e6eb2bacb84b67a517d9bc630a03f81e99c3..4c55ec96852c078c94539f1c5bdafb7738f1d980 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -1041,6 +1041,8 @@ public void dropDatabase(String catalogName, String dbName, boolean deleteData, return; } + String dbNameWithCatalog = prependCatalogToDbName(catalogName, dbName, conf); + if (cascade) { // Note that this logic may drop some of the tables of the database // even if the drop database fail for any reason @@ -1050,18 +1052,115 @@ public void dropDatabase(String catalogName, String dbName, boolean deleteData, // First we delete the materialized views dropTable(dbName, table, deleteData, true); } - List tableList = getAllTables(dbName); - for (String table : tableList) { - // Now we delete the rest of tables - try { - // Subclasses can override this step (for example, for temporary tables) - dropTable(dbName, table, deleteData, true); - } catch (UnsupportedOperationException e) { - // Ignore Index tables, those will be dropped with parent tables + + /** + * When dropping db cascade, client side hooks have to be called at each table removal. + * If {@link org.apache.hadoop.hive.metastore.conf.MetastoreConf#ConfVars.BATCH_RETRIEVE_MAX + * BATCH_RETRIEVE_MAX} is less than the number of tables in the DB, we'll have to call the + * hooks one by one each alongside with a + * {@link #dropTable(String, String, boolean, boolean, EnvironmentContext) dropTable} call to + * ensure transactionality. + */ + List tableNameList = getAllTables(dbName); + int tableCount = tableNameList.size(); + int maxBatchSize = MetastoreConf.getIntVar(conf, ConfVars.BATCH_RETRIEVE_MAX); + LOG.debug("Selecting dropDatabase method for " + dbName + " (" + tableCount + " tables), " + + ConfVars.BATCH_RETRIEVE_MAX.getVarname() + "=" + maxBatchSize); + + if (tableCount > maxBatchSize) { + LOG.debug("Dropping database in a per table batch manner."); + dropDatabaseCascadePerTable(catalogName, dbName, tableNameList, deleteData, maxBatchSize); + } else { + LOG.debug("Dropping database in a per DB manner."); + dropDatabaseCascadePerDb(catalogName, dbName, tableNameList, deleteData); + } + + } else { + client.drop_database(dbNameWithCatalog, deleteData, cascade); + } + } + + /** + * Handles dropDatabase by invoking drop_table in HMS for each table. + * Useful when table list in DB is too large to fit in memory. It will retrieve tables in + * chunks and for each table with a drop_table hook it will invoke drop_table on both HMS and + * the hook. This is a timely operation so hookless tables are skipped and will be dropped on + * server side when the client invokes drop_database. + * Note that this is 'less transactional' than dropDatabaseCascadePerDb since we're dropping + * table level objects, so the overall outcome of this method might result in a halfly dropped DB. + * @param catName + * @param dbName + * @param tableList + * @param deleteData + * @param maxBatchSize + * @throws TException + */ + private void dropDatabaseCascadePerTable(String catName, String dbName, List tableList, + boolean deleteData, int maxBatchSize) throws TException { + String dbNameWithCatalog = prependCatalogToDbName(catName, dbName, conf); + for (Table table : new TableIterable(this, catName, dbName, tableList, maxBatchSize)) { + boolean success = false; + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + try { + hook.preDropTable(table); + client.drop_table_with_environment_context(dbNameWithCatalog, table.getTableName(), deleteData, null); + hook.commitDropTable(table, deleteData); + success = true; + } finally { + if (!success) { + hook.rollbackDropTable(table); + } + } + } + client.drop_database(dbNameWithCatalog, deleteData, true); + } + + /** + * Handles dropDatabase by invoking drop_database in HMS. + * Useful when table list in DB can fit in memory, it will retrieve all tables at once and + * call drop_database once. Also handles drop_table hooks. + * @param catName + * @param dbName + * @param tableList + * @param deleteData + * @throws TException + */ + private void dropDatabaseCascadePerDb(String catName, String dbName, List tableList, + boolean deleteData) throws TException { + String dbNameWithCatalog = prependCatalogToDbName(catName, dbName, conf); + List
tables = getTableObjectsByName(catName, dbName, tableList); + boolean success = false; + try { + for (Table table : tables) { + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + hook.preDropTable(table); + } + client.drop_database(dbNameWithCatalog, deleteData, true); + for (Table table : tables) { + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + hook.commitDropTable(table, deleteData); + } + success = true; + } finally { + if (!success) { + for (Table table : tables) { + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + hook.rollbackDropTable(table); } } } - client.drop_database(prependCatalogToDbName(catalogName, dbName, conf), deleteData, cascade); } @Override diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TableIterable.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TableIterable.java new file mode 100644 index 0000000000000000000000000000000000000000..1a17fe31c36538862beade4e0c3ee1428d2a6559 --- /dev/null +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TableIterable.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; + +/** + * Use this to get Table objects for a table list. It provides an iterator to + * on the resulting Table objects. It batches the calls to + * IMetaStoreClient.getTableObjectsByName to avoid OOM issues in HS2 (with + * embedded metastore) or MetaStore server (if HS2 is using remote metastore). + * + */ +public class TableIterable implements Iterable
{ + + @Override + public Iterator
iterator() { + return new Iterator
() { + + private final Iterator tableNamesIter = tableNames.iterator(); + private Iterator batchIter = null; + + @Override + public boolean hasNext() { + return ((batchIter != null) && batchIter.hasNext()) || tableNamesIter.hasNext(); + } + + @Override + public Table next() { + if ((batchIter == null) || !batchIter.hasNext()) { + getNextBatch(); + } + return batchIter.next(); + } + + private void getNextBatch() { + // get next batch of table names in this list + List nameBatch = new ArrayList(); + int batchCounter = 0; + while (batchCounter < batchSize && tableNamesIter.hasNext()) { + nameBatch.add(tableNamesIter.next()); + batchCounter++; + } + // get the Table objects for this batch of table names and get iterator + // on it + + try { + if (catName != null) { + batchIter = msc.getTableObjectsByName(catName, dbname, nameBatch).iterator(); + } else { + batchIter = msc.getTableObjectsByName(dbname, nameBatch).iterator(); + } + } catch (TException e) { + throw new RuntimeException(e); + } + + } + + @Override + public void remove() { + throw new IllegalStateException( + "TableIterable is a read-only iterable and remove() is unsupported"); + } + }; + } + + private final IMetaStoreClient msc; + private final String dbname; + private final List tableNames; + private final int batchSize; + private final String catName; + + /** + * Primary constructor that fetches all tables in a given msc, given a Hive + * object,a db name and a table name list. + */ + public TableIterable(IMetaStoreClient msc, String dbname, List tableNames, int batchSize) + throws TException { + this.msc = msc; + this.catName = null; + this.dbname = dbname; + this.tableNames = tableNames; + this.batchSize = batchSize; + } + + public TableIterable(IMetaStoreClient msc, String catName, String dbname, List + tableNames, int batchSize) throws TException { + this.msc = msc; + this.catName = catName; + this.dbname = dbname; + this.tableNames = tableNames; + this.batchSize = batchSize; + } +} diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestTableIterable.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestTableIterable.java new file mode 100644 index 0000000000000000000000000000000000000000..f0d442755cf074d3d39e4990351929c7f4995cf4 --- /dev/null +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestTableIterable.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.UnknownDBException; +import org.apache.thrift.TException; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.anyListOf; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * Unit tests for TableIterable. + */ +@Category(MetastoreUnitTest.class) +public class TestTableIterable { + + @Test + public void testNumReturned() throws MetaException, InvalidOperationException, + UnknownDBException, TException { + HiveMetaStoreClient msc = mock(HiveMetaStoreClient.class); + + + // create a mocked metastore client that returns 3 table objects every time it is called + // will use same size for TableIterable batch fetch size + List
threeTables = Arrays.asList(new Table(), new Table(), new Table()); + when(msc.getTableObjectsByName(anyString(), anyListOf(String.class))).thenReturn(threeTables); + + List tableNames = Arrays.asList("a", "b", "c", "d", "e", "f"); + TableIterable tIterable = new TableIterable(msc, "dummy", tableNames, + threeTables.size()); + tIterable.iterator(); + + Iterator
tIter = tIterable.iterator(); + int size = 0; + while(tIter.hasNext()) { + size++; + tIter.next(); + } + assertEquals("Number of table objects returned", size, tableNames.size()); + + verify(msc).getTableObjectsByName("dummy", Arrays.asList("a", "b", "c")); + verify(msc).getTableObjectsByName("dummy", Arrays.asList("d", "e", "f")); + + } +}