diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index e598a43e4d..3fa21b768c 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -1461,6 +1461,13 @@ private int getObjectCount(String fieldName, String objName) { LOG.debug("getTableMeta with filter " + filterBuilder.toString() + " params: " + StringUtils.join(parameterVals, ", ")); } + // Add the fetch group here which retrieves the database object along with the MTable + // objects. If we don't prefetch the database object, we could end up in a situation where + // the database gets dropped while we are looping through the tables throwing a + // JDOObjectNotFoundException. This causes HMS to go into a retry loop which greatly degrades + // performance of this function when called with dbNames="*" and tableNames="*" (fetch all + // tables in all databases, essentially a full dump) + pm.getFetchPlan().addGroup(FetchGroups.FETCH_DATABASE_ON_MTABLE); query = pm.newQuery(MTable.class, filterBuilder.toString()); Collection tables = (Collection) query.executeWithArray(parameterVals.toArray(new String[parameterVals.size()])); for (MTable table : tables) { @@ -1472,6 +1479,7 @@ private int getObjectCount(String fieldName, String objName) { } commited = commitTransaction(); } finally { + pm.getFetchPlan().removeGroup(FetchGroups.FETCH_DATABASE_ON_MTABLE); rollbackAndCleanup(commited, query); } return metas; diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/FetchGroups.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/FetchGroups.java new file mode 100644 index 0000000000..ef2f81d917 --- /dev/null +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/FetchGroups.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.model; + +public class FetchGroups { + // This fetch group will fetch the database object when fetching MTable using JDO. + public static final String FETCH_DATABASE_ON_MTABLE = "fetch_db_mtable"; +} diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java index 92182ae257..f2dfbfd284 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java @@ -21,7 +21,12 @@ import java.util.List; import java.util.Map; +import javax.jdo.annotations.FetchGroup; +import javax.jdo.annotations.PersistenceCapable; +import javax.jdo.annotations.Persistent; +@PersistenceCapable +@FetchGroup(name = FetchGroups.FETCH_DATABASE_ON_MTABLE, members = {@Persistent(name = "database")}) public class MTable { private long id; diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/StatementVerifyingDerby.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/StatementVerifyingDerby.java new file mode 100644 index 0000000000..45c5cb2107 --- /dev/null +++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/StatementVerifyingDerby.java @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.sql.Array; +import java.sql.Blob; +import java.sql.CallableStatement; +import java.sql.Clob; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.NClob; +import java.sql.PreparedStatement; +import java.sql.SQLClientInfoException; +import java.sql.SQLException; +import java.sql.SQLFeatureNotSupportedException; +import java.sql.SQLWarning; +import java.sql.SQLXML; +import java.sql.Savepoint; +import java.sql.Statement; +import java.sql.Struct; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.Executor; +import java.util.logging.Logger; + +/** + * Fake derby driver - companion class to enable testing by TestObjectStoreStatementVerify. + * This derby driver will call a verifying function during prepareStatement. + */ +public class StatementVerifyingDerby extends org.apache.derby.jdbc.EmbeddedDriver { + + @Override + public boolean acceptsURL(String url) throws SQLException { + url = url.replace("sderby", "derby"); + return super.acceptsURL(url); + } + + @Override + public Connection connect(String url, Properties info) throws SQLException { + url = url.replace("sderby", "derby"); + return new StatementVerifyingDerby.StatementVerifyingConnection(super.connect(url, info)); + } + + @Override + public Logger getParentLogger() throws SQLFeatureNotSupportedException { + throw new SQLFeatureNotSupportedException(); + } + + public class StatementVerifyingConnection implements Connection { + private java.sql.Connection _baseConn; + + public StatementVerifyingConnection(Connection connection) { + this._baseConn = connection; + } + + @Override + public Statement createStatement() throws SQLException { + return _baseConn.createStatement(); + } + + @Override + public PreparedStatement prepareStatement(String sql) throws SQLException { + return _baseConn.prepareStatement(sql); + } + + @Override + public PreparedStatement prepareStatement(String sql, int resultSetType, int resultSetConcurrency, int resultSetHoldability) throws SQLException { + return _baseConn.prepareStatement(sql, resultSetType, resultSetConcurrency, resultSetHoldability); + } + + @Override + public PreparedStatement prepareStatement(String sql, int autoGeneratedKeys) throws SQLException { + return _baseConn.prepareStatement(sql, autoGeneratedKeys); + } + + @Override + public PreparedStatement prepareStatement(String sql, int[] columnIndexes) throws SQLException { + return _baseConn.prepareStatement(sql, columnIndexes); + } + + @Override + public PreparedStatement prepareStatement(String sql, String[] columnNames) throws SQLException { + return _baseConn.prepareStatement(sql, columnNames); + } + + @Override + public PreparedStatement prepareStatement(String sql, int resultSetType, int resultSetConcurrency) throws SQLException { + TestObjectStoreStatementVerify.verifySql(sql); + return _baseConn.prepareStatement(sql, resultSetType, resultSetConcurrency); + } + + @Override + public CallableStatement prepareCall(String sql) throws SQLException { + return _baseConn.prepareCall(sql); + } + + @Override + public CallableStatement prepareCall(String sql, int resultSetType, int resultSetConcurrency) throws SQLException { + return _baseConn.prepareCall(sql, resultSetType, resultSetConcurrency); + } + + @Override + public CallableStatement prepareCall(String sql, int resultSetType, int resultSetConcurrency, int resultSetHoldability) throws SQLException { + return _baseConn.prepareCall(sql, resultSetType, resultSetConcurrency, resultSetHoldability); + } + + @Override + public String nativeSQL(String sql) throws SQLException { + return _baseConn.nativeSQL(sql); + } + + @Override + public boolean getAutoCommit() throws SQLException { + return _baseConn.getAutoCommit(); + } + + @Override + public void setAutoCommit(boolean autoCommit) throws SQLException { + _baseConn.setAutoCommit(autoCommit); + } + + @Override + public void commit() throws SQLException { + _baseConn.commit(); + } + + @Override + public void rollback() throws SQLException { + _baseConn.rollback(); + } + + @Override + public void close() throws SQLException { + _baseConn.close(); + } + + @Override + public boolean isClosed() throws SQLException { + return _baseConn.isClosed(); + } + + @Override + public DatabaseMetaData getMetaData() throws SQLException { + return _baseConn.getMetaData(); + } + + @Override + public boolean isReadOnly() throws SQLException { + return _baseConn.isReadOnly(); + } + + @Override + public void setReadOnly(boolean readOnly) throws SQLException { + _baseConn.setReadOnly(readOnly); + } + + @Override + public String getCatalog() throws SQLException { + return _baseConn.getCatalog(); + } + + @Override + public void setCatalog(String catalog) throws SQLException { + _baseConn.setCatalog(catalog); + } + + @Override + public int getTransactionIsolation() throws SQLException { + return _baseConn.getTransactionIsolation(); + } + + @Override + public void setTransactionIsolation(int level) throws SQLException { + _baseConn.setTransactionIsolation(level); + } + + @Override + public SQLWarning getWarnings() throws SQLException { + return _baseConn.getWarnings(); + } + + @Override + public void clearWarnings() throws SQLException { + _baseConn.clearWarnings(); + } + + @Override + public Statement createStatement(int resultSetType, int resultSetConcurrency) throws SQLException { + return _baseConn.createStatement(resultSetType, resultSetConcurrency); + } + + + @Override + public Map> getTypeMap() throws SQLException { + return _baseConn.getTypeMap(); + } + + @Override + public void setTypeMap(Map> map) throws SQLException { + _baseConn.setTypeMap(map); + } + + @Override + public int getHoldability() throws SQLException { + return _baseConn.getHoldability(); + } + + @Override + public void setHoldability(int holdability) throws SQLException { + _baseConn.setHoldability(holdability); + } + + @Override + public Savepoint setSavepoint() throws SQLException { + return _baseConn.setSavepoint(); + } + + @Override + public Savepoint setSavepoint(String name) throws SQLException { + return _baseConn.setSavepoint(name); + } + + @Override + public void rollback(Savepoint savepoint) throws SQLException { + _baseConn.rollback(savepoint); + } + + @Override + public void releaseSavepoint(Savepoint savepoint) throws SQLException { + _baseConn.releaseSavepoint(savepoint); + } + + @Override + public Statement createStatement(int resultSetType, int resultSetConcurrency, int resultSetHoldability) throws SQLException { + return _baseConn.createStatement(resultSetType, resultSetConcurrency, resultSetHoldability); + } + + + @Override + public Clob createClob() throws SQLException { + return _baseConn.createClob(); + } + + @Override + public Blob createBlob() throws SQLException { + return _baseConn.createBlob(); + } + + @Override + public NClob createNClob() throws SQLException { + return _baseConn.createNClob(); + } + + @Override + public SQLXML createSQLXML() throws SQLException { + return _baseConn.createSQLXML(); + } + + @Override + public boolean isValid(int timeout) throws SQLException { + return _baseConn.isValid(timeout); + } + + @Override + public void setClientInfo(String name, String value) throws SQLClientInfoException { + _baseConn.setClientInfo(name, value); + } + + @Override + public String getClientInfo(String name) throws SQLException { + return _baseConn.getClientInfo(name); + } + + @Override + public Properties getClientInfo() throws SQLException { + return _baseConn.getClientInfo(); + } + + @Override + public void setClientInfo(Properties properties) throws SQLClientInfoException { + _baseConn.setClientInfo(properties); + } + + @Override + public Array createArrayOf(String typeName, Object[] elements) throws SQLException { + return _baseConn.createArrayOf(typeName, elements); + } + + @Override + public Struct createStruct(String typeName, Object[] attributes) throws SQLException { + return _baseConn.createStruct(typeName, attributes); + } + + @Override + public String getSchema() throws SQLException { + return _baseConn.getSchema(); + } + + @Override + public void setSchema(String schema) throws SQLException { + _baseConn.setSchema(schema); + } + + @Override + public void abort(Executor executor) throws SQLException { + _baseConn.abort(executor); + } + + @Override + public void setNetworkTimeout(Executor executor, int milliseconds) throws SQLException { + _baseConn.setNetworkTimeout(executor, milliseconds); + } + + @Override + public int getNetworkTimeout() throws SQLException { + return _baseConn.getNetworkTimeout(); + } + + @Override + public T unwrap(Class iface) throws SQLException { + return _baseConn.unwrap(iface); + } + + @Override + public boolean isWrapperFor(Class iface) throws SQLException { + return _baseConn.isWrapperFor(iface); + } + } +} diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStoreStatementVerify.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStoreStatementVerify.java new file mode 100644 index 0000000000..f9b5f4718d --- /dev/null +++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStoreStatementVerify.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.sql.DriverManager; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.TableMeta; +import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(MetastoreUnitTest.class) +public class TestObjectStoreStatementVerify { + private ObjectStore objectStore = null; + private Configuration conf = null; + + private final String DB1 = "db1"; + private final String TBL1 = "db1_tbl1"; + + @BeforeClass + public static void oneTimeSetup() throws SQLException { + DriverManager.registerDriver(new StatementVerifyingDerby()); + } + + private ObjectStore createObjectStore() throws MetaException, InvalidOperationException { + conf = MetastoreConf.newMetastoreConf(); + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST, true); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CONNECTION_DRIVER, StatementVerifyingDerby.class.getName()); + String jdbcUrl = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.CONNECT_URL_KEY); + jdbcUrl = jdbcUrl.replace("derby","sderby"); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CONNECT_URL_KEY,jdbcUrl); + MetaStoreTestUtils.setConfForStandloneMode(conf); + + final ObjectStore objectStore = new ObjectStore(); + objectStore.setConf(conf); + HiveMetaStore.HMSHandler.createDefaultCatalog(objectStore, new Warehouse(conf)); + return objectStore; + } + + // This function is called during the prepare statement step of object retrieval through DN + static void verifySql(final String sql) { + if (sql.contains("SELECT DISTINCT 'org.apache.hadoop.hive.metastore.model.MTable' AS")) { + verifyMTableDBFetchGroup(sql); + } + } + + private static void verifyMTableDBFetchGroup(final String sql) { + // We want to validate that when an MTable is fetched, we join the DBS table and fetch + // the database info as well. For example, if we don't use a proper fetch plan, the DN query + // would be something like + // + // SELECT DISTINCT 'org.apache.hadoop.hive.metastore.model.MTable' AS + // NUCLEUS_TYPE, A0.CREATE_TIME, A0.TBL_ID, A0.LAST_ACCESS_TIME, A0.OWNER, A0.OWNER_TYPE, + // A0.RETENTION, A0.IS_REWRITE_ENABLED, A0.TBL_NAME, A0.TBL_TYPE, A0.WRITE_ID + // FROM TBLS A0 + // LEFT OUTER JOIN DBS B0 ON A0.DB_ID = B0.DB_ID WHERE B0.CTLG_NAME = ? + // + // Note in the above query that we never pick anything from the DBS table! + // + // If you have a good fetch plan, your query should be something like + // + // SELECT DISTINCT 'org.apache.hadoop.hive.metastore.model.MTable' AS + // NUCLEUS_TYPE, A0.CREATE_TIME, C0.CTLG_NAME, C0."DESC", C0.DB_LOCATION_URI, C0."NAME", + // C0.OWNER_NAME, C0.OWNER_TYPE, C0.DB_ID, A0.TBL_ID, A0.LAST_ACCESS_TIME, A0.OWNER, + // A0.OWNER_TYPE, A0.RETENTION, A0.IS_REWRITE_ENABLED, A0.TBL_NAME, A0.TBL_TYPE, A0.WRITE_ID + // FROM TBLS A0 LEFT OUTER JOIN DBS B0 ON A0.DB_ID = B0.DB_ID + // LEFT OUTER JOIN DBS C0 ON A0.DB_ID = C0.DB_ID WHERE B0.CTLG_NAME = ? + // + // Notice that we pick the DB_ID, OWNER_TYPE, NAME, DESC etc from the DBS table. This is the + // correct behavior. + + // Step 1. Find the identifiers for the DBS database by matching on "JOIN DBS (xx) ON" + Pattern sqlPatternDb = Pattern.compile("JOIN\\ DBS\\ ([a-zA-Z0-9]+)\\ ON"); + Matcher matcher = sqlPatternDb.matcher(sql); + List dbIdentifiers = new ArrayList<>(); + while (matcher.find()) { + dbIdentifiers.add(matcher.group(1)); + } + // Step 2. Now there should a string with the db identifier which picks the NAME field from + // databases. If we don't find this, then we did not join in the database info. + boolean confirmedDbNameRetrieval = false; + for (String dbIdenfier : dbIdentifiers) { + if (sql.contains(dbIdenfier + ".\"NAME\"")) { + confirmedDbNameRetrieval = true; + break; + } + } + Assert.assertTrue("The Db info should be retrieved as part of MTable fetch", confirmedDbNameRetrieval); + } + + @Test + public void testGetTableMetaFetchGroup() throws MetaException, InvalidObjectException, + InvalidOperationException { + objectStore = createObjectStore(); + + Database db = new Database(DB1, "description", "locurl", null); + db.setCatalogName("hive"); + + objectStore.createDatabase(db); + objectStore.createTable(makeTable(DB1, TBL1)); + + List tableMeta = objectStore.getTableMeta("hive", "*", "*", Collections.emptyList()); + Assert.assertEquals("Number of items for tableMeta is incorrect", 1, tableMeta.size()); + Assert.assertEquals("Table name incorrect", TBL1, tableMeta.get(0).getTableName()); + Assert.assertEquals("Db name incorrect", DB1, tableMeta.get(0).getDbName()); + } + + private Table makeTable(String dbName, String tblName) throws MetaException { + return new TableBuilder() + .setTableName(tblName) + .setDbName(dbName) + .setOwner("owner") + .setCreateTime(1) + .setLastAccessTime(2) + .setRetention(3) + .addTableParam("EXTERNAL", "false") + .setType("MANAGED_TABLE") + .setLocation("location") + .setCompressed(false) + .setNumBuckets(0) + .setSerdeName("SerDeName") + .setSerdeLib("serializationLib") + .addCol("some_col", "double", null) + .build(conf); + } +}