commit 13366d96f082a5256088f572673f02aab82fa757 Author: Vihang Karajgaonkar Date: Thu May 25 17:55:02 2017 -0700 HIVE-16771 : Schematool should use MetastoreSchemaInfo to get the metastore schema version from database diff --git beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java index f312e46d755fc843369167af6d4da3de7dbe4a61..3237841abd3c78e59d042ad107ed4be93e568058 100644 --- beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java @@ -80,6 +80,7 @@ private final String dbType; private final String metaDbType; private final IMetaStoreSchemaInfo metaStoreSchemaInfo; + private boolean needsQuotedIdentifier; static final private Logger LOG = LoggerFactory.getLogger(HiveSchemaTool.class.getName()); @@ -95,6 +96,7 @@ public HiveSchemaTool(String hiveHome, HiveConf hiveConf, String dbType, String this.hiveConf = hiveConf; this.dbType = dbType; this.metaDbType = metaDbType; + this.needsQuotedIdentifier = getDbCommandParser(dbType).needsQuotedIdentifier(); this.metaStoreSchemaInfo = MetaStoreSchemaInfoFactory.get(hiveConf, hiveHome, dbType); } @@ -169,42 +171,14 @@ private NestedScriptParser getDbCommandParser(String dbType) { public void showInfo() throws HiveMetaException { Connection metastoreConn = getConnectionToMetastore(true); String hiveVersion = metaStoreSchemaInfo.getHiveSchemaVersion(); - String dbVersion = getMetaStoreSchemaVersion(metastoreConn); + String dbVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion(metastoreConn, + needsQuotedIdentifier); System.out.println("Hive distribution version:\t " + hiveVersion); System.out.println("Metastore schema version:\t " + dbVersion); assertCompatibleVersion(hiveVersion, dbVersion); } - private String getMetaStoreSchemaVersion(Connection metastoreConn) - throws HiveMetaException { - return getMetaStoreSchemaVersion(metastoreConn, false); - } - - // read schema version from metastore - private String getMetaStoreSchemaVersion(Connection metastoreConn, - boolean checkDuplicatedVersion) throws HiveMetaException { - String versionQuery; - if (getDbCommandParser(dbType).needsQuotedIdentifier()) { - versionQuery = "select t.\"SCHEMA_VERSION\" from \"VERSION\" t"; - } else { - versionQuery = "select t.SCHEMA_VERSION from VERSION t"; - } - try(Statement stmt = metastoreConn.createStatement(); - ResultSet res = stmt.executeQuery(versionQuery)) { - if (!res.next()) { - throw new HiveMetaException("Could not find version info in metastore VERSION table."); - } - String currentSchemaVersion = res.getString(1); - if (checkDuplicatedVersion && res.next()) { - throw new HiveMetaException("Multiple versions were found in metastore."); - } - return currentSchemaVersion; - } catch (SQLException e) { - throw new HiveMetaException("Failed to get schema version, Cause:" + e.getMessage()); - } - } - boolean validateLocations(Connection conn, URI[] defaultServers) throws HiveMetaException { System.out.println("Validating database/table/partition locations"); boolean rtn; @@ -226,7 +200,7 @@ private boolean checkMetaStoreDBLocation(Connection conn, URI[] defaultServers) String dbLoc; boolean isValid = true; int numOfInvalid = 0; - if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + if (needsQuotedIdentifier) { dbLoc = "select dbt.\"DB_ID\", dbt.\"NAME\", dbt.\"DB_LOCATION_URI\" from \"DBS\" dbt"; } else { dbLoc = "select dbt.DB_ID, dbt.NAME, dbt.DB_LOCATION_URI from DBS dbt"; @@ -255,13 +229,13 @@ private boolean checkMetaStoreTableLocation(Connection conn, URI[] defaultServer String tabLoc, tabIDRange; boolean isValid = true; int numOfInvalid = 0; - if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + if (needsQuotedIdentifier) { tabIDRange = "select max(\"TBL_ID\"), min(\"TBL_ID\") from \"TBLS\" "; } else { tabIDRange = "select max(TBL_ID), min(TBL_ID) from TBLS"; } - if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + if (needsQuotedIdentifier) { tabLoc = "select tbl.\"TBL_ID\", tbl.\"TBL_NAME\", sd.\"LOCATION\", dbt.\"DB_ID\", dbt.\"NAME\" from \"TBLS\" tbl inner join " + "\"SDS\" sd on tbl.\"SD_ID\" = sd.\"SD_ID\" and tbl.\"TBL_TYPE\" != '" + TableType.VIRTUAL_VIEW + "' and tbl.\"TBL_ID\" >= ? and tbl.\"TBL_ID\"<= ? " + "inner join \"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" "; @@ -315,13 +289,13 @@ private boolean checkMetaStorePartitionLocation(Connection conn, URI[] defaultSe String partLoc, partIDRange; boolean isValid = true; int numOfInvalid = 0; - if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + if (needsQuotedIdentifier) { partIDRange = "select max(\"PART_ID\"), min(\"PART_ID\") from \"PARTITIONS\" "; } else { partIDRange = "select max(PART_ID), min(PART_ID) from PARTITIONS"; } - if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + if (needsQuotedIdentifier) { partLoc = "select pt.\"PART_ID\", pt.\"PART_NAME\", sd.\"LOCATION\", tbl.\"TBL_ID\", tbl.\"TBL_NAME\",dbt.\"DB_ID\", dbt.\"NAME\" from \"PARTITIONS\" pt " + "inner join \"SDS\" sd on pt.\"SD_ID\" = sd.\"SD_ID\" and pt.\"PART_ID\" >= ? and pt.\"PART_ID\"<= ? " + " inner join \"TBLS\" tbl on pt.\"TBL_ID\" = tbl.\"TBL_ID\" inner join " @@ -376,13 +350,13 @@ private boolean checkMetaStoreSkewedColumnsLocation(Connection conn, URI[] defau String skewedColLoc, skewedColIDRange; boolean isValid = true; int numOfInvalid = 0; - if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + if (needsQuotedIdentifier) { skewedColIDRange = "select max(\"STRING_LIST_ID_KID\"), min(\"STRING_LIST_ID_KID\") from \"SKEWED_COL_VALUE_LOC_MAP\" "; } else { skewedColIDRange = "select max(STRING_LIST_ID_KID), min(STRING_LIST_ID_KID) from SKEWED_COL_VALUE_LOC_MAP"; } - if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + if (needsQuotedIdentifier) { skewedColLoc = "select t.\"TBL_NAME\", t.\"TBL_ID\", sk.\"STRING_LIST_ID_KID\", sk.\"LOCATION\", db.\"NAME\", db.\"DB_ID\" from \"TBLS\" t, \"SDS\" s, \"DBS\" db, \"SKEWED_COL_VALUE_LOC_MAP\" sk " + "where sk.\"SD_ID\" = s.\"SD_ID\" and s.\"SD_ID\" = t.\"SD_ID\" and t.\"DB_ID\" = db.\"DB_ID\" and sk.\"STRING_LIST_ID_KID\" >= ? and sk.\"STRING_LIST_ID_KID\" <= ? "; } else { @@ -497,11 +471,10 @@ public void verifySchemaVersion() throws HiveMetaException { if (dryRun) { return; } - String newSchemaVersion = getMetaStoreSchemaVersion( - getConnectionToMetastore(false)); + String newSchemaVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion( + getConnectionToMetastore(false), needsQuotedIdentifier); // verify that the new version is added to schema assertCompatibleVersion(metaStoreSchemaInfo.getHiveSchemaVersion(), newSchemaVersion); - } private void assertCompatibleVersion(String hiveSchemaVersion, String dbSchemaVersion) @@ -517,8 +490,9 @@ private void assertCompatibleVersion(String hiveSchemaVersion, String dbSchemaVe * @throws MetaException */ public void doUpgrade() throws HiveMetaException { - String fromVersion = getMetaStoreSchemaVersion( - getConnectionToMetastore(false)); + String fromVersion = + metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionToMetastore(false), + needsQuotedIdentifier); if (fromVersion == null || fromVersion.isEmpty()) { throw new HiveMetaException("Schema version not stored in the metastore. " + "Metastore schema is too old or corrupt. Try specifying the version manually"); @@ -682,10 +656,10 @@ boolean validateSequences(Connection conn) throws HiveMetaException { for (String seqName : seqNameToTable.keySet()) { String tableName = seqNameToTable.get(seqName).getLeft(); String tableKey = seqNameToTable.get(seqName).getRight(); - String seqQuery = getDbCommandParser(dbType).needsQuotedIdentifier() ? + String seqQuery = needsQuotedIdentifier ? ("select t.\"NEXT_VAL\" from \"SEQUENCE_TABLE\" t WHERE t.\"SEQUENCE_NAME\"='org.apache.hadoop.hive.metastore.model." + seqName + "'") : ("select t.NEXT_VAL from SEQUENCE_TABLE t WHERE t.SEQUENCE_NAME='org.apache.hadoop.hive.metastore.model." + seqName + "'"); - String maxIdQuery = getDbCommandParser(dbType).needsQuotedIdentifier() ? + String maxIdQuery = needsQuotedIdentifier ? ("select max(\"" + tableKey + "\") from \"" + tableName + "\"") : ("select max(" + tableKey + ") from " + tableName); @@ -715,7 +689,7 @@ boolean validateSequences(Connection conn) throws HiveMetaException { boolean validateSchemaVersions(Connection conn) throws HiveMetaException { System.out.println("Validating schema version"); try { - String newSchemaVersion = getMetaStoreSchemaVersion(conn, true); + String newSchemaVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion(conn, needsQuotedIdentifier); assertCompatibleVersion(metaStoreSchemaInfo.getHiveSchemaVersion(), newSchemaVersion); } catch (HiveMetaException hme) { if (hme.getMessage().contains("Metastore schema version is not compatible") @@ -743,7 +717,8 @@ boolean validateSchemaTables(Connection conn) throws HiveMetaException { System.out.println("Validating metastore schema tables"); try { - version = getMetaStoreSchemaVersion(hmsConn); + version = metaStoreSchemaInfo.getMetaStoreSchemaVersion(hmsConn, + needsQuotedIdentifier); } catch (HiveMetaException he) { System.err.println("Failed to determine schema version from Hive Metastore DB. " + he.getMessage()); System.out.println("Failed in schema version validation."); @@ -781,7 +756,8 @@ boolean validateSchemaTables(Connection conn) throws HiveMetaException { // parse the schema file to determine the tables that are expected to exist // we are using oracle schema because it is simpler to parse, no quotes or backticks etc String baseDir = new File(metaStoreSchemaInfo.getMetaStoreScriptDir()).getParent(); - String schemaFile = baseDir + "/" + dbType + "/hive-schema-" + version + "." + dbType + ".sql"; + String schemaFile = metaStoreSchemaInfo.getMetaStoreScriptDir() + File.separatorChar + + metaStoreSchemaInfo.generateInitFileName(version); try { LOG.debug("Parsing schema script " + schemaFile); @@ -896,7 +872,7 @@ boolean validateColumnNullValues(Connection conn) throws HiveMetaException { boolean isValid = true; try { Statement stmt = conn.createStatement(); - String tblQuery = getDbCommandParser(dbType).needsQuotedIdentifier() ? + String tblQuery = needsQuotedIdentifier ? ("select t.* from \"TBLS\" t WHERE t.\"SD_ID\" IS NULL and (t.\"TBL_TYPE\"='" + TableType.EXTERNAL_TABLE + "' or t.\"TBL_TYPE\"='" + TableType.MANAGED_TABLE + "')") : ("select t.* from TBLS t WHERE t.SD_ID IS NULL and (t.TBL_TYPE='" + TableType.EXTERNAL_TABLE + "' or t.TBL_TYPE='" + TableType.MANAGED_TABLE + "')"); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreSchemaInfo.java metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreSchemaInfo.java index d6627433cb0f3f1c8fe868b3748fd754a7bff821..38db8116abbff2cec8120fc3f1bbfcfb1f153a5e 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreSchemaInfo.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreSchemaInfo.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.metastore; import org.apache.hadoop.hive.common.classification.InterfaceAudience; + +import java.sql.Connection; import java.util.List; /** @@ -78,6 +80,18 @@ String getHiveSchemaVersion(); /** + * Get the schema version from the backend database. This version is used by SchemaTool to to + * compare the version returned by getHiveSchemaVersion and determine the upgrade order and + * scripts needed to upgrade the metastore schema + * + * @param metastoreDbConnection Connection to the backend database + * @param needsQuotedIdentifier if true query should be built using quotes as identifier + * @return + * @throws HiveMetaException when unable to fetch the schema version + */ + String getMetaStoreSchemaVersion(Connection metastoreDbConnection, boolean needsQuotedIdentifier) + throws HiveMetaException; + /** * A dbVersion is compatible with hive version if it is greater or equal to the hive version. This * is result of the db schema upgrade design principles followed in hive project. The state where * db schema version is ahead of hive software version is often seen when a 'rolling upgrade' or diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreSchemaInfo.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreSchemaInfo.java index 6bddb8e864d26a6cb7e2fe9b8b8becf427779944..062ebf8ac41db8423d7ccfa48a1c6c13d15de5d1 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreSchemaInfo.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreSchemaInfo.java @@ -22,6 +22,10 @@ import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -198,4 +202,28 @@ public boolean isVersionCompatible(String hiveVersion, String dbVersion) { return true; } + @Override + public String getMetaStoreSchemaVersion(Connection metastoreDbConnection, + boolean needsQuotedIdentifier) throws HiveMetaException { + String versionQuery; + if (needsQuotedIdentifier) { + versionQuery = "select t.\"SCHEMA_VERSION\" from \"VERSION\" t"; + } else { + versionQuery = "select t.SCHEMA_VERSION from VERSION t"; + } + try { + Statement stmt = metastoreDbConnection.createStatement(); + ResultSet res = stmt.executeQuery(versionQuery); + if (!res.next()) { + throw new HiveMetaException("Could not find version info in metastore VERSION table."); + } + String currentSchemaVersion = res.getString(1); + if (res.next()) { + throw new HiveMetaException("Multiple versions were found in metastore."); + } + return currentSchemaVersion; + } catch (SQLException e) { + throw new HiveMetaException("Failed to get schema version, Cause:" + e.getMessage()); + } + } }