diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLine.java b/beeline/src/java/org/apache/hive/beeline/BeeLine.java index 4ae2e3f..9e7cab8 100644 --- a/beeline/src/java/org/apache/hive/beeline/BeeLine.java +++ b/beeline/src/java/org/apache/hive/beeline/BeeLine.java @@ -172,7 +172,7 @@ private static final String HIVE_VAR_PREFIX = "--hivevar"; private static final String HIVE_CONF_PREFIX = "--hiveconf"; private static final String PROP_FILE_PREFIX = "--property-file"; - static final String PASSWD_MASK = "[passwd stripped]"; + public static final String PASSWD_MASK = "[passwd stripped]"; private final Map formats = map(new Object[] { "vertical", new VerticalOutputFormat(this), diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java index 85052d9..3877b5c 100644 --- a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java +++ b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java @@ -48,7 +48,7 @@ import jline.console.history.MemoryHistory; import org.apache.hadoop.hive.conf.HiveConf; -class BeeLineOpts implements Completer { +public class BeeLineOpts implements Completer { public static final int DEFAULT_MAX_WIDTH = 80; public static final int DEFAULT_MAX_HEIGHT = 80; public static final int DEFAULT_HEADER_INTERVAL = 100; diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java deleted file mode 100644 index 7aad265..0000000 --- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ /dev/null @@ -1,1495 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.beeline; - -import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.cli.OptionGroup; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.commons.io.output.NullOutputStream; -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.DatabaseProduct; -import org.apache.hadoop.hive.metastore.HiveMetaException; -import org.apache.hadoop.hive.metastore.IMetaStoreSchemaInfo; -import org.apache.hadoop.hive.metastore.MetaStoreSchemaInfoFactory; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; -import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.MetaStoreConnectionInfo; -import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.NestedScriptParser; -import org.apache.hadoop.hive.metastore.tools.SQLGenerator; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.ImmutableMap; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintStream; -import java.net.URI; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.SQLFeatureNotSupportedException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; - -public class HiveSchemaTool { - private String userName = null; - private String passWord = null; - private boolean dryRun = false; - private boolean verbose = false; - private String dbOpts = null; - private String url = null; - private String driver = null; - private URI[] validationServers = null; // The list of servers the database/partition/table can locate on - private final HiveConf hiveConf; - private final String dbType; - private final String metaDbType; - private final IMetaStoreSchemaInfo metaStoreSchemaInfo; - private boolean needsQuotedIdentifier; - private String quoteCharacter; - - static final private Logger LOG = LoggerFactory.getLogger(HiveSchemaTool.class.getName()); - - public HiveSchemaTool(String dbType, String metaDbType) throws HiveMetaException { - this(System.getenv("HIVE_HOME"), new HiveConf(HiveSchemaTool.class), dbType, metaDbType); - } - - public HiveSchemaTool(String hiveHome, HiveConf hiveConf, String dbType, String metaDbType) - throws HiveMetaException { - if (hiveHome == null || hiveHome.isEmpty()) { - throw new HiveMetaException("No Hive home directory provided"); - } - this.hiveConf = hiveConf; - this.dbType = dbType; - this.metaDbType = metaDbType; - NestedScriptParser parser = getDbCommandParser(dbType, metaDbType); - this.needsQuotedIdentifier = parser.needsQuotedIdentifier(); - this.quoteCharacter = parser.getQuoteCharacter(); - this.metaStoreSchemaInfo = MetaStoreSchemaInfoFactory.get(hiveConf, hiveHome, dbType); - // If the dbType is "hive", this is setting up the information schema in Hive. - // We will set the default jdbc url and driver. - // It is overriden by command line options if passed (-url and -driver - if (dbType.equalsIgnoreCase(HiveSchemaHelper.DB_HIVE)) { - url = HiveSchemaHelper.EMBEDDED_HS2_URL; - driver = HiveSchemaHelper.HIVE_JDBC_DRIVER; - } - } - - public HiveConf getHiveConf() { - return hiveConf; - } - - public void setUrl(String url) { - this.url = url; - } - - public void setDriver(String driver) { - this.driver = driver; - } - - public void setUserName(String userName) { - this.userName = userName; - } - - public void setPassWord(String passWord) { - this.passWord = passWord; - } - - public void setDryRun(boolean dryRun) { - this.dryRun = dryRun; - } - - public void setVerbose(boolean verbose) { - this.verbose = verbose; - } - - public void setDbOpts(String dbOpts) { - this.dbOpts = dbOpts; - } - - public void setValidationServers(String servers) { - if(StringUtils.isNotEmpty(servers)) { - String[] strServers = servers.split(","); - this.validationServers = new URI[strServers.length]; - for (int i = 0; i < validationServers.length; i++) { - validationServers[i] = new Path(strServers[i]).toUri(); - } - } - } - - private static void printAndExit(Options cmdLineOptions) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("schemaTool", cmdLineOptions); - System.exit(1); - } - - Connection getConnectionToMetastore(boolean printInfo) throws HiveMetaException { - return HiveSchemaHelper.getConnectionToMetastore(userName, passWord, url, driver, printInfo, hiveConf, - null); - } - - private NestedScriptParser getDbCommandParser(String dbType, String metaDbType) { - return HiveSchemaHelper.getDbCommandParser(dbType, dbOpts, userName, passWord, hiveConf, - metaDbType, false); - } - - /*** - * Print Hive version and schema version - * @throws MetaException - */ - public void showInfo() throws HiveMetaException { - String hiveVersion = metaStoreSchemaInfo.getHiveSchemaVersion(); - String dbVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(true)); - System.out.println("Hive distribution version:\t " + hiveVersion); - System.out.println("Metastore schema version:\t " + dbVersion); - assertCompatibleVersion(hiveVersion, dbVersion); - } - - boolean validateLocations(Connection conn, URI[] defaultServers) throws HiveMetaException { - System.out.println("Validating DFS locations"); - boolean rtn; - rtn = checkMetaStoreDBLocation(conn, defaultServers); - rtn = checkMetaStoreTableLocation(conn, defaultServers) && rtn; - rtn = checkMetaStorePartitionLocation(conn, defaultServers) && rtn; - rtn = checkMetaStoreSkewedColumnsLocation(conn, defaultServers) && rtn; - System.out.println((rtn ? "Succeeded" : "Failed") + " in DFS location validation."); - return rtn; - } - - private String getNameOrID(ResultSet res, int nameInx, int idInx) throws SQLException { - String itemName = res.getString(nameInx); - return (itemName == null || itemName.isEmpty()) ? "ID: " + res.getString(idInx) : "Name: " + itemName; - } - - private boolean checkMetaStoreDBLocation(Connection conn, URI[] defaultServers) - throws HiveMetaException { - String dbLoc; - boolean isValid = true; - int numOfInvalid = 0; - if (needsQuotedIdentifier) { - dbLoc = "select dbt.\"DB_ID\", dbt.\"NAME\", dbt.\"DB_LOCATION_URI\" from \"DBS\" dbt order by dbt.\"DB_ID\" "; - } else { - dbLoc = "select dbt.DB_ID, dbt.NAME, dbt.DB_LOCATION_URI from DBS dbt order by dbt.DB_ID"; - } - - try(Statement stmt = conn.createStatement(); - ResultSet res = stmt.executeQuery(dbLoc)) { - while (res.next()) { - String locValue = res.getString(3); - String dbName = getNameOrID(res,2,1); - if (!checkLocation("Database " + dbName, locValue, defaultServers)) { - numOfInvalid++; - } - } - } catch (SQLException e) { - throw new HiveMetaException("Failed to get DB Location Info.", e); - } - if (numOfInvalid > 0) { - isValid = false; - } - return isValid; - } - - private boolean checkMetaStoreTableLocation(Connection conn, URI[] defaultServers) - throws HiveMetaException { - String tabLoc, tabIDRange; - boolean isValid = true; - int numOfInvalid = 0; - if (needsQuotedIdentifier) { - tabIDRange = "select max(\"TBL_ID\"), min(\"TBL_ID\") from \"TBLS\" "; - } else { - tabIDRange = "select max(TBL_ID), min(TBL_ID) from TBLS"; - } - - if (needsQuotedIdentifier) { - tabLoc = "select tbl.\"TBL_ID\", tbl.\"TBL_NAME\", sd.\"LOCATION\", dbt.\"DB_ID\", dbt.\"NAME\" from \"TBLS\" tbl inner join " + - "\"SDS\" sd on tbl.\"SD_ID\" = sd.\"SD_ID\" and tbl.\"TBL_TYPE\" != '" + TableType.VIRTUAL_VIEW + - "' and tbl.\"TBL_ID\" >= ? and tbl.\"TBL_ID\"<= ? " + "inner join \"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" order by tbl.\"TBL_ID\" "; - } else { - tabLoc = "select tbl.TBL_ID, tbl.TBL_NAME, sd.LOCATION, dbt.DB_ID, dbt.NAME from TBLS tbl join SDS sd on tbl.SD_ID = sd.SD_ID and tbl.TBL_TYPE !='" - + TableType.VIRTUAL_VIEW + "' and tbl.TBL_ID >= ? and tbl.TBL_ID <= ? inner join DBS dbt on tbl.DB_ID = dbt.DB_ID order by tbl.TBL_ID"; - } - - long maxID = 0, minID = 0; - long rtnSize = 2000; - - try { - Statement stmt = conn.createStatement(); - ResultSet res = stmt.executeQuery(tabIDRange); - if (res.next()) { - maxID = res.getLong(1); - minID = res.getLong(2); - } - res.close(); - stmt.close(); - PreparedStatement pStmt = conn.prepareStatement(tabLoc); - while (minID <= maxID) { - pStmt.setLong(1, minID); - pStmt.setLong(2, minID + rtnSize); - res = pStmt.executeQuery(); - while (res.next()) { - String locValue = res.getString(3); - String entity = "Database " + getNameOrID(res, 5, 4) + - ", Table " + getNameOrID(res,2,1); - if (!checkLocation(entity, locValue, defaultServers)) { - numOfInvalid++; - } - } - res.close(); - minID += rtnSize + 1; - - } - pStmt.close(); - - } catch (SQLException e) { - throw new HiveMetaException("Failed to get Table Location Info.", e); - } - if (numOfInvalid > 0) { - isValid = false; - } - return isValid; - } - - private boolean checkMetaStorePartitionLocation(Connection conn, URI[] defaultServers) - throws HiveMetaException { - String partLoc, partIDRange; - boolean isValid = true; - int numOfInvalid = 0; - if (needsQuotedIdentifier) { - partIDRange = "select max(\"PART_ID\"), min(\"PART_ID\") from \"PARTITIONS\" "; - } else { - partIDRange = "select max(PART_ID), min(PART_ID) from PARTITIONS"; - } - - if (needsQuotedIdentifier) { - partLoc = "select pt.\"PART_ID\", pt.\"PART_NAME\", sd.\"LOCATION\", tbl.\"TBL_ID\", tbl.\"TBL_NAME\",dbt.\"DB_ID\", dbt.\"NAME\" from \"PARTITIONS\" pt " - + "inner join \"SDS\" sd on pt.\"SD_ID\" = sd.\"SD_ID\" and pt.\"PART_ID\" >= ? and pt.\"PART_ID\"<= ? " - + " inner join \"TBLS\" tbl on pt.\"TBL_ID\" = tbl.\"TBL_ID\" inner join " - + "\"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" order by tbl.\"TBL_ID\" "; - } else { - partLoc = "select pt.PART_ID, pt.PART_NAME, sd.LOCATION, tbl.TBL_ID, tbl.TBL_NAME, dbt.DB_ID, dbt.NAME from PARTITIONS pt " - + "inner join SDS sd on pt.SD_ID = sd.SD_ID and pt.PART_ID >= ? and pt.PART_ID <= ? " - + "inner join TBLS tbl on tbl.TBL_ID = pt.TBL_ID inner join DBS dbt on tbl.DB_ID = dbt.DB_ID order by tbl.TBL_ID "; - } - - long maxID = 0, minID = 0; - long rtnSize = 2000; - - try { - Statement stmt = conn.createStatement(); - ResultSet res = stmt.executeQuery(partIDRange); - if (res.next()) { - maxID = res.getLong(1); - minID = res.getLong(2); - } - res.close(); - stmt.close(); - PreparedStatement pStmt = conn.prepareStatement(partLoc); - while (minID <= maxID) { - pStmt.setLong(1, minID); - pStmt.setLong(2, minID + rtnSize); - res = pStmt.executeQuery(); - while (res.next()) { - String locValue = res.getString(3); - String entity = "Database " + getNameOrID(res,7,6) + - ", Table " + getNameOrID(res,5,4) + - ", Partition " + getNameOrID(res,2,1); - if (!checkLocation(entity, locValue, defaultServers)) { - numOfInvalid++; - } - } - res.close(); - minID += rtnSize + 1; - } - pStmt.close(); - } catch (SQLException e) { - throw new HiveMetaException("Failed to get Partition Location Info.", e); - } - if (numOfInvalid > 0) { - isValid = false; - } - return isValid; - } - - private boolean checkMetaStoreSkewedColumnsLocation(Connection conn, URI[] defaultServers) - throws HiveMetaException { - String skewedColLoc, skewedColIDRange; - boolean isValid = true; - int numOfInvalid = 0; - if (needsQuotedIdentifier) { - skewedColIDRange = "select max(\"STRING_LIST_ID_KID\"), min(\"STRING_LIST_ID_KID\") from \"SKEWED_COL_VALUE_LOC_MAP\" "; - } else { - skewedColIDRange = "select max(STRING_LIST_ID_KID), min(STRING_LIST_ID_KID) from SKEWED_COL_VALUE_LOC_MAP"; - } - - if (needsQuotedIdentifier) { - skewedColLoc = "select t.\"TBL_NAME\", t.\"TBL_ID\", sk.\"STRING_LIST_ID_KID\", sk.\"LOCATION\", db.\"NAME\", db.\"DB_ID\" " - + " from \"TBLS\" t, \"SDS\" s, \"DBS\" db, \"SKEWED_COL_VALUE_LOC_MAP\" sk " - + "where sk.\"SD_ID\" = s.\"SD_ID\" and s.\"SD_ID\" = t.\"SD_ID\" and t.\"DB_ID\" = db.\"DB_ID\" and " - + "sk.\"STRING_LIST_ID_KID\" >= ? and sk.\"STRING_LIST_ID_KID\" <= ? order by t.\"TBL_ID\" "; - } else { - skewedColLoc = "select t.TBL_NAME, t.TBL_ID, sk.STRING_LIST_ID_KID, sk.LOCATION, db.NAME, db.DB_ID from TBLS t, SDS s, DBS db, SKEWED_COL_VALUE_LOC_MAP sk " - + "where sk.SD_ID = s.SD_ID and s.SD_ID = t.SD_ID and t.DB_ID = db.DB_ID and sk.STRING_LIST_ID_KID >= ? and sk.STRING_LIST_ID_KID <= ? order by t.TBL_ID "; - } - - long maxID = 0, minID = 0; - long rtnSize = 2000; - - try { - Statement stmt = conn.createStatement(); - ResultSet res = stmt.executeQuery(skewedColIDRange); - if (res.next()) { - maxID = res.getLong(1); - minID = res.getLong(2); - } - res.close(); - stmt.close(); - PreparedStatement pStmt = conn.prepareStatement(skewedColLoc); - while (minID <= maxID) { - pStmt.setLong(1, minID); - pStmt.setLong(2, minID + rtnSize); - res = pStmt.executeQuery(); - while (res.next()) { - String locValue = res.getString(4); - String entity = "Database " + getNameOrID(res,5,6) + - ", Table " + getNameOrID(res,1,2) + - ", String list " + res.getString(3); - if (!checkLocation(entity, locValue, defaultServers)) { - numOfInvalid++; - } - } - res.close(); - minID += rtnSize + 1; - } - pStmt.close(); - } catch (SQLException e) { - throw new HiveMetaException("Failed to get skewed columns location info.", e); - } - if (numOfInvalid > 0) { - isValid = false; - } - return isValid; - } - - /** - * Check if the location is valid for the given entity - * @param entity the entity to represent a database, partition or table - * @param entityLocation the location - * @param defaultServers a list of the servers that the location needs to match. - * The location host needs to match one of the given servers. - * If empty, then no check against such list. - * @return true if the location is valid - */ - private boolean checkLocation( - String entity, - String entityLocation, - URI[] defaultServers) { - boolean isValid = true; - if (entityLocation == null) { - System.err.println(entity + ", Error: empty location"); - isValid = false; - } else { - try { - URI currentUri = new Path(entityLocation).toUri(); - String scheme = currentUri.getScheme(); - String path = currentUri.getPath(); - if (StringUtils.isEmpty(scheme)) { - System.err.println(entity + ", Location: "+ entityLocation + ", Error: missing location scheme."); - isValid = false; - } else if (StringUtils.isEmpty(path)) { - System.err.println(entity + ", Location: "+ entityLocation + ", Error: missing location path."); - isValid = false; - } else if (ArrayUtils.isNotEmpty(defaultServers) && currentUri.getAuthority() != null) { - String authority = currentUri.getAuthority(); - boolean matchServer = false; - for(URI server : defaultServers) { - if (StringUtils.equalsIgnoreCase(server.getScheme(), scheme) && - StringUtils.equalsIgnoreCase(server.getAuthority(), authority)) { - matchServer = true; - break; - } - } - if (!matchServer) { - System.err.println(entity + ", Location: " + entityLocation + ", Error: mismatched server."); - isValid = false; - } - } - - // if there is no path element other than "/", report it but not fail - if (isValid && StringUtils.containsOnly(path, "/")) { - System.err.println(entity + ", Location: "+ entityLocation + ", Warn: location set to root, not a recommended config."); - } - } catch (Exception pe) { - System.err.println(entity + ", Error: invalid location - " + pe.getMessage()); - isValid =false; - } - } - - return isValid; - } - - // test the connection metastore using the config property - private void testConnectionToMetastore() throws HiveMetaException { - Connection conn = getConnectionToMetastore(true); - try { - conn.close(); - } catch (SQLException e) { - throw new HiveMetaException("Failed to close metastore connection", e); - } - } - - - /** - * check if the current schema version in metastore matches the Hive version - * @throws MetaException - */ - public void verifySchemaVersion() throws HiveMetaException { - // don't check version if its a dry run - if (dryRun) { - return; - } - String newSchemaVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(false)); - // verify that the new version is added to schema - assertCompatibleVersion(metaStoreSchemaInfo.getHiveSchemaVersion(), newSchemaVersion); - } - - private void assertCompatibleVersion(String hiveSchemaVersion, String dbSchemaVersion) - throws HiveMetaException { - if (!metaStoreSchemaInfo.isVersionCompatible(hiveSchemaVersion, dbSchemaVersion)) { - throw new HiveMetaException("Metastore schema version is not compatible. Hive Version: " - + hiveSchemaVersion + ", Database Schema Version: " + dbSchemaVersion); - } - } - - /** - * Perform metastore schema upgrade. extract the current schema version from metastore - * @throws MetaException - */ - public void doUpgrade() throws HiveMetaException { - String fromVersion = - metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(false)); - if (fromVersion == null || fromVersion.isEmpty()) { - throw new HiveMetaException("Schema version not stored in the metastore. " + - "Metastore schema is too old or corrupt. Try specifying the version manually"); - } - doUpgrade(fromVersion); - } - - private MetaStoreConnectionInfo getConnectionInfo(boolean printInfo) { - return new MetaStoreConnectionInfo(userName, passWord, url, driver, printInfo, hiveConf, - dbType, metaDbType); - } - /** - * Perform metastore schema upgrade - * - * @param fromSchemaVer - * Existing version of the metastore. If null, then read from the metastore - * @throws MetaException - */ - public void doUpgrade(String fromSchemaVer) throws HiveMetaException { - if (metaStoreSchemaInfo.getHiveSchemaVersion().equals(fromSchemaVer)) { - System.out.println("No schema upgrade required from version " + fromSchemaVer); - return; - } - // Find the list of scripts to execute for this upgrade - List upgradeScripts = - metaStoreSchemaInfo.getUpgradeScripts(fromSchemaVer); - testConnectionToMetastore(); - System.out.println("Starting upgrade metastore schema from version " + - fromSchemaVer + " to " + metaStoreSchemaInfo.getHiveSchemaVersion()); - String scriptDir = metaStoreSchemaInfo.getMetaStoreScriptDir(); - try { - for (String scriptFile : upgradeScripts) { - System.out.println("Upgrade script " + scriptFile); - if (!dryRun) { - runPreUpgrade(scriptDir, scriptFile); - runBeeLine(scriptDir, scriptFile); - System.out.println("Completed " + scriptFile); - } - } - } catch (IOException eIO) { - throw new HiveMetaException( - "Upgrade FAILED! Metastore state would be inconsistent !!", eIO); - } - - // Revalidated the new version after upgrade - verifySchemaVersion(); - } - - /** - * Initialize the metastore schema to current version - * - * @throws MetaException - */ - public void doInit() throws HiveMetaException { - doInit(metaStoreSchemaInfo.getHiveSchemaVersion()); - - // Revalidated the new version after upgrade - verifySchemaVersion(); - } - - /** - * Initialize the metastore schema - * - * @param toVersion - * If null then current hive version is used - * @throws MetaException - */ - public void doInit(String toVersion) throws HiveMetaException { - testConnectionToMetastore(); - System.out.println("Starting metastore schema initialization to " + toVersion); - - String initScriptDir = metaStoreSchemaInfo.getMetaStoreScriptDir(); - String initScriptFile = metaStoreSchemaInfo.generateInitFileName(toVersion); - - try { - System.out.println("Initialization script " + initScriptFile); - if (!dryRun) { - runBeeLine(initScriptDir, initScriptFile); - System.out.println("Initialization script completed"); - } - } catch (IOException e) { - throw new HiveMetaException("Schema initialization FAILED!" + - " Metastore state would be inconsistent !!", e); - } - } - - public void doValidate() throws HiveMetaException { - System.out.println("Starting metastore validation\n"); - Connection conn = getConnectionToMetastore(false); - boolean success = true; - try { - if (validateSchemaVersions()) { - System.out.println("[SUCCESS]\n"); - } else { - success = false; - System.out.println("[FAIL]\n"); - } - if (validateSequences(conn)) { - System.out.println("[SUCCESS]\n"); - } else { - success = false; - System.out.println("[FAIL]\n"); - } - if (validateSchemaTables(conn)) { - System.out.println("[SUCCESS]\n"); - } else { - success = false; - System.out.println("[FAIL]\n"); - } - if (validateLocations(conn, this.validationServers)) { - System.out.println("[SUCCESS]\n"); - } else { - System.out.println("[WARN]\n"); - } - if (validateColumnNullValues(conn)) { - System.out.println("[SUCCESS]\n"); - } else { - System.out.println("[WARN]\n"); - } - } finally { - if (conn != null) { - try { - conn.close(); - } catch (SQLException e) { - throw new HiveMetaException("Failed to close metastore connection", e); - } - } - } - - System.out.print("Done with metastore validation: "); - if (!success) { - System.out.println("[FAIL]"); - System.exit(1); - } else { - System.out.println("[SUCCESS]"); - } - } - - boolean validateSequences(Connection conn) throws HiveMetaException { - Map> seqNameToTable = - new ImmutableMap.Builder>() - .put("MDatabase", Pair.of("DBS", "DB_ID")) - .put("MRole", Pair.of("ROLES", "ROLE_ID")) - .put("MGlobalPrivilege", Pair.of("GLOBAL_PRIVS", "USER_GRANT_ID")) - .put("MTable", Pair.of("TBLS","TBL_ID")) - .put("MStorageDescriptor", Pair.of("SDS", "SD_ID")) - .put("MSerDeInfo", Pair.of("SERDES", "SERDE_ID")) - .put("MColumnDescriptor", Pair.of("CDS", "CD_ID")) - .put("MTablePrivilege", Pair.of("TBL_PRIVS", "TBL_GRANT_ID")) - .put("MTableColumnStatistics", Pair.of("TAB_COL_STATS", "CS_ID")) - .put("MPartition", Pair.of("PARTITIONS", "PART_ID")) - .put("MPartitionColumnStatistics", Pair.of("PART_COL_STATS", "CS_ID")) - .put("MFunction", Pair.of("FUNCS", "FUNC_ID")) - .put("MIndex", Pair.of("IDXS", "INDEX_ID")) - .put("MStringList", Pair.of("SKEWED_STRING_LIST", "STRING_LIST_ID")) - .build(); - - System.out.println("Validating sequence number for SEQUENCE_TABLE"); - - boolean isValid = true; - try { - Statement stmt = conn.createStatement(); - for (String seqName : seqNameToTable.keySet()) { - String tableName = seqNameToTable.get(seqName).getLeft(); - String tableKey = seqNameToTable.get(seqName).getRight(); - String fullSequenceName = "org.apache.hadoop.hive.metastore.model." + seqName; - String seqQuery = needsQuotedIdentifier ? - ("select t.\"NEXT_VAL\" from \"SEQUENCE_TABLE\" t WHERE t.\"SEQUENCE_NAME\"=? order by t.\"SEQUENCE_NAME\" ") - : ("select t.NEXT_VAL from SEQUENCE_TABLE t WHERE t.SEQUENCE_NAME=? order by t.SEQUENCE_NAME "); - String maxIdQuery = needsQuotedIdentifier ? - ("select max(\"" + tableKey + "\") from \"" + tableName + "\"") - : ("select max(" + tableKey + ") from " + tableName); - - ResultSet res = stmt.executeQuery(maxIdQuery); - if (res.next()) { - long maxId = res.getLong(1); - if (maxId > 0) { - PreparedStatement pStmt = conn.prepareStatement(seqQuery); - pStmt.setString(1, fullSequenceName); - ResultSet resSeq = pStmt.executeQuery(); - if (!resSeq.next()) { - isValid = false; - System.err.println("Missing SEQUENCE_NAME " + seqName + " from SEQUENCE_TABLE"); - } else if (resSeq.getLong(1) < maxId) { - isValid = false; - System.err.println("NEXT_VAL for " + seqName + " in SEQUENCE_TABLE < max(" + - tableKey + ") in " + tableName); - } - } - } - } - - System.out.println((isValid ? "Succeeded" :"Failed") + " in sequence number validation for SEQUENCE_TABLE."); - return isValid; - } catch(SQLException e) { - throw new HiveMetaException("Failed to validate sequence number for SEQUENCE_TABLE", e); - } - } - - boolean validateSchemaVersions() throws HiveMetaException { - System.out.println("Validating schema version"); - try { - String newSchemaVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(false)); - assertCompatibleVersion(metaStoreSchemaInfo.getHiveSchemaVersion(), newSchemaVersion); - } catch (HiveMetaException hme) { - if (hme.getMessage().contains("Metastore schema version is not compatible") - || hme.getMessage().contains("Multiple versions were found in metastore") - || hme.getMessage().contains("Could not find version info in metastore VERSION table")) { - System.err.println(hme.getMessage()); - System.out.println("Failed in schema version validation."); - return false; - } else { - throw hme; - } - } - System.out.println("Succeeded in schema version validation."); - return true; - } - - boolean validateSchemaTables(Connection conn) throws HiveMetaException { - String version = null; - ResultSet rs = null; - DatabaseMetaData metadata = null; - List dbTables = new ArrayList(); - List schemaTables = new ArrayList(); - List subScripts = new ArrayList(); - Connection hmsConn = getConnectionToMetastore(false); - - System.out.println("Validating metastore schema tables"); - try { - version = metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(false)); - } catch (HiveMetaException he) { - System.err.println("Failed to determine schema version from Hive Metastore DB. " + he.getMessage()); - System.out.println("Failed in schema table validation."); - LOG.debug("Failed to determine schema version from Hive Metastore DB," + he.getMessage()); - return false; - } - - // re-open the hms connection - hmsConn = getConnectionToMetastore(false); - - LOG.debug("Validating tables in the schema for version " + version); - try { - String schema = null; - try { - schema = hmsConn.getSchema(); - } catch (SQLFeatureNotSupportedException e) { - LOG.debug("schema is not supported"); - } - - metadata = conn.getMetaData(); - String[] types = {"TABLE"}; - rs = metadata.getTables(null, schema, "%", types); - String table = null; - - while (rs.next()) { - table = rs.getString("TABLE_NAME"); - dbTables.add(table.toLowerCase()); - LOG.debug("Found table " + table + " in HMS dbstore"); - } - } catch (SQLException e) { - throw new HiveMetaException("Failed to retrieve schema tables from Hive Metastore DB", e); - } finally { - if (rs != null) { - try { - rs.close(); - } catch (SQLException e) { - throw new HiveMetaException("Failed to close resultset", e); - } - } - } - - // parse the schema file to determine the tables that are expected to exist - // we are using oracle schema because it is simpler to parse, no quotes or backticks etc - String baseDir = new File(metaStoreSchemaInfo.getMetaStoreScriptDir()).getParent(); - String schemaFile = new File(metaStoreSchemaInfo.getMetaStoreScriptDir(), - metaStoreSchemaInfo.generateInitFileName(version)).getPath(); - try { - LOG.debug("Parsing schema script " + schemaFile); - subScripts.addAll(findCreateTable(schemaFile, schemaTables)); - while (subScripts.size() > 0) { - schemaFile = baseDir + "/" + dbType + "/" + subScripts.remove(0); - LOG.debug("Parsing subscript " + schemaFile); - subScripts.addAll(findCreateTable(schemaFile, schemaTables)); - } - } catch (Exception e) { - System.err.println("Exception in parsing schema file. Cause:" + e.getMessage()); - System.out.println("Failed in schema table validation."); - return false; - } - - LOG.debug("Schema tables:[ " + Arrays.toString(schemaTables.toArray()) + " ]"); - LOG.debug("DB tables:[ " + Arrays.toString(dbTables.toArray()) + " ]"); - // now diff the lists - schemaTables.removeAll(dbTables); - if (schemaTables.size() > 0) { - Collections.sort(schemaTables); - System.err.println("Table(s) [ " + Arrays.toString(schemaTables.toArray()) - + " ] are missing from the metastore database schema."); - System.out.println("Failed in schema table validation."); - return false; - } else { - System.out.println("Succeeded in schema table validation."); - return true; - } - } - - private List findCreateTable(String path, List tableList) - throws Exception { - NestedScriptParser sp = HiveSchemaHelper.getDbCommandParser(dbType, false); - Matcher matcher = null; - Pattern regexp = null; - List subs = new ArrayList(); - int groupNo = 2; - - regexp = Pattern.compile("CREATE TABLE(\\s+IF NOT EXISTS)?\\s+(\\S+).*"); - - if (!(new File(path)).exists()) { - throw new Exception(path + " does not exist. Potentially incorrect version in the metastore VERSION table"); - } - - try ( - BufferedReader reader = new BufferedReader(new FileReader(path)); - ){ - String line = null; - while ((line = reader.readLine()) != null) { - if (sp.isNestedScript(line)) { - String subScript = null; - subScript = sp.getScriptName(line); - LOG.debug("Schema subscript " + subScript + " found"); - subs.add(subScript); - continue; - } - line = line.replaceAll("( )+", " "); //suppress multi-spaces - line = line.replaceAll("\\(", " "); - line = line.replaceAll("IF NOT EXISTS ", ""); - line = line.replaceAll("`",""); - line = line.replaceAll("'",""); - line = line.replaceAll("\"",""); - matcher = regexp.matcher(line); - - if (matcher.find()) { - String table = matcher.group(groupNo); - if (dbType.equals("derby")) - table = table.replaceAll("APP\\.",""); - tableList.add(table.toLowerCase()); - LOG.debug("Found table " + table + " in the schema"); - } - } - } catch (IOException ex){ - throw new Exception(ex.getMessage()); - } - - return subs; - } - - boolean validateColumnNullValues(Connection conn) throws HiveMetaException { - System.out.println("Validating columns for incorrect NULL values."); - boolean isValid = true; - try { - Statement stmt = conn.createStatement(); - String tblQuery = needsQuotedIdentifier ? - ("select t.* from \"TBLS\" t WHERE t.\"SD_ID\" IS NULL and (t.\"TBL_TYPE\"='" + TableType.EXTERNAL_TABLE + "' or t.\"TBL_TYPE\"='" + TableType.MANAGED_TABLE + "') order by t.\"TBL_ID\" ") - : ("select t.* from TBLS t WHERE t.SD_ID IS NULL and (t.TBL_TYPE='" + TableType.EXTERNAL_TABLE + "' or t.TBL_TYPE='" + TableType.MANAGED_TABLE + "') order by t.TBL_ID "); - - ResultSet res = stmt.executeQuery(tblQuery); - while (res.next()) { - long tableId = res.getLong("TBL_ID"); - String tableName = res.getString("TBL_NAME"); - String tableType = res.getString("TBL_TYPE"); - isValid = false; - System.err.println("SD_ID in TBLS should not be NULL for Table Name=" + tableName + ", Table ID=" + tableId + ", Table Type=" + tableType); - } - - System.out.println((isValid ? "Succeeded" : "Failed") + " in column validation for incorrect NULL values."); - return isValid; - } catch(SQLException e) { - throw new HiveMetaException("Failed to validate columns for incorrect NULL values", e); - } - } - - @VisibleForTesting - void createCatalog(String catName, String location, String description, boolean ifNotExists) - throws HiveMetaException { - catName = normalizeIdentifier(catName); - System.out.println("Create catalog " + catName + " at location " + location); - - Connection conn = getConnectionToMetastore(true); - boolean success = false; - try { - conn.setAutoCommit(false); - try (Statement stmt = conn.createStatement()) { - // If they set ifNotExists check for existence first, and bail if it exists. This is - // more reliable then attempting to parse the error message from the SQLException. - if (ifNotExists) { - String query = "select " + quoteIf("NAME") + " from " + quoteIf("CTLGS") + - " where " + quoteIf("NAME") + " = '" + catName + "'"; - LOG.debug("Going to run " + query); - ResultSet rs = stmt.executeQuery(query); - if (rs.next()) { - System.out.println("Catalog " + catName + " already exists"); - return; - } - } - SQLGenerator sqlGenerator = new SQLGenerator( - DatabaseProduct.determineDatabaseProduct( - conn.getMetaData().getDatabaseProductName() - ), hiveConf); - String query = sqlGenerator.addForUpdateClause("select max(" + quoteIf("CTLG_ID") + ") " + - "from " + quoteIf("CTLGS")); - LOG.debug("Going to run " + query); - ResultSet rs = stmt.executeQuery(query); - if (!rs.next()) { - throw new HiveMetaException("No catalogs found, have you upgraded the database?"); - } - int catNum = rs.getInt(1) + 1; - - String update = "insert into " + quoteIf("CTLGS") + - "(" + quoteIf("CTLG_ID") + ", " + quoteIf("NAME") + ", " + quoteAlways("DESC") + ", " + quoteIf( "LOCATION_URI") + ") " + - " values (" + catNum + ", '" + catName + "', '" + description + "', '" + location + "')"; - LOG.debug("Going to run " + update); - stmt.execute(update); - conn.commit(); - success = true; - } - } catch (MetaException|SQLException e) { - throw new HiveMetaException("Failed to add catalog", e); - } finally { - try { - if (!success) conn.rollback(); - } catch (SQLException e) { - // Not really much we can do here. - LOG.error("Failed to rollback, everything will probably go bad from here."); - } - } - } - - @VisibleForTesting - void moveDatabase(String fromCatName, String toCatName, String dbName) throws HiveMetaException { - fromCatName = normalizeIdentifier(fromCatName); - toCatName = normalizeIdentifier(toCatName); - dbName = normalizeIdentifier(dbName); - System.out.println("Moving database " + dbName + " from catalog " + fromCatName + - " to catalog " + toCatName); - Connection conn = getConnectionToMetastore(true); - boolean success = false; - try { - conn.setAutoCommit(false); - try (Statement stmt = conn.createStatement()) { - updateCatalogNameInTable(stmt, "DBS", "CTLG_NAME", "NAME", fromCatName, toCatName, dbName, false); - updateCatalogNameInTable(stmt, "TAB_COL_STATS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); - updateCatalogNameInTable(stmt, "PART_COL_STATS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); - updateCatalogNameInTable(stmt, "PARTITION_EVENTS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); - updateCatalogNameInTable(stmt, "NOTIFICATION_LOG", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); - conn.commit(); - success = true; - } - } catch (SQLException e) { - throw new HiveMetaException("Failed to move database", e); - } finally { - try { - if (!success) conn.rollback(); - } catch (SQLException e) { - // Not really much we can do here. - LOG.error("Failed to rollback, everything will probably go bad from here."); - } - } - } - - private void updateCatalogNameInTable(Statement stmt, String tableName, String catColName, - String dbColName, String fromCatName, - String toCatName, String dbName, boolean zeroUpdatesOk) - throws HiveMetaException, SQLException { - String update = "update " + quoteIf(tableName) + " " + - "set " + quoteIf(catColName) + " = '" + toCatName + "' " + - "where " + quoteIf(catColName) + " = '" + fromCatName + "' and " + quoteIf(dbColName) + " = '" + dbName + "'"; - LOG.debug("Going to run " + update); - int numUpdated = stmt.executeUpdate(update); - if (numUpdated != 1 && !(zeroUpdatesOk && numUpdated == 0)) { - throw new HiveMetaException("Failed to properly update the " + tableName + - " table. Expected to update 1 row but instead updated " + numUpdated); - } - } - - @VisibleForTesting - void moveTable(String fromCat, String toCat, String fromDb, String toDb, String tableName) - throws HiveMetaException { - fromCat = normalizeIdentifier(fromCat); - toCat = normalizeIdentifier(toCat); - fromDb = normalizeIdentifier(fromDb); - toDb = normalizeIdentifier(toDb); - tableName = normalizeIdentifier(tableName); - Connection conn = getConnectionToMetastore(true); - boolean success = false; - try { - conn.setAutoCommit(false); - try (Statement stmt = conn.createStatement()) { - // Find the old database id - String query = "select " + quoteIf("DB_ID") + - " from " + quoteIf("DBS") + - " where " + quoteIf("NAME") + " = '" + fromDb + "' " - + "and " + quoteIf("CTLG_NAME") + " = '" + fromCat + "'"; - LOG.debug("Going to run " + query); - ResultSet rs = stmt.executeQuery(query); - if (!rs.next()) { - throw new HiveMetaException("Unable to find database " + fromDb); - } - long oldDbId = rs.getLong(1); - - // Find the new database id - query = "select " + quoteIf("DB_ID") + - " from " + quoteIf("DBS") + - " where " + quoteIf("NAME") + " = '" + toDb + "' " - + "and " + quoteIf("CTLG_NAME") + " = '" + toCat + "'"; - LOG.debug("Going to run " + query); - rs = stmt.executeQuery(query); - if (!rs.next()) { - throw new HiveMetaException("Unable to find database " + toDb); - } - long newDbId = rs.getLong(1); - - String update = "update " + quoteIf("TBLS") + " " + - "set " + quoteIf("DB_ID") + " = " + newDbId + " " + - "where " + quoteIf("DB_ID") + " = " + oldDbId + - " and " + quoteIf("TBL_NAME") + " = '" + tableName + "'"; - LOG.debug("Going to run " + update); - int numUpdated = stmt.executeUpdate(update); - if (numUpdated != 1) { - throw new HiveMetaException( - "Failed to properly update TBLS table. Expected to update " + - "1 row but instead updated " + numUpdated); - } - updateDbNameForTable(stmt, "TAB_COL_STATS", "TABLE_NAME", fromCat, toCat, fromDb, toDb, tableName); - updateDbNameForTable(stmt, "PART_COL_STATS", "TABLE_NAME", fromCat, toCat, fromDb, toDb, tableName); - updateDbNameForTable(stmt, "PARTITION_EVENTS", "TBL_NAME", fromCat, toCat, fromDb, toDb, tableName); - updateDbNameForTable(stmt, "NOTIFICATION_LOG", "TBL_NAME", fromCat, toCat, fromDb, toDb, tableName); - conn.commit(); - success = true; - } - } catch (SQLException se) { - throw new HiveMetaException("Failed to move table", se); - } finally { - try { - if (!success) conn.rollback(); - } catch (SQLException e) { - // Not really much we can do here. - LOG.error("Failed to rollback, everything will probably go bad from here."); - } - - } - } - - private void updateDbNameForTable(Statement stmt, String tableName, - String tableColumnName, String fromCat, String toCat, - String fromDb, String toDb, String hiveTblName) - throws HiveMetaException, SQLException { - String update = "update " + quoteIf(tableName) + " " + - "set " + quoteIf("CAT_NAME") + " = '" + toCat + "', " + quoteIf("DB_NAME") + " = '" + toDb + "' " + - "where " + quoteIf("CAT_NAME") + " = '" + fromCat + "' " + - "and " + quoteIf("DB_NAME") + " = '" + fromDb + "' " + - "and " + quoteIf(tableColumnName) + " = '" + hiveTblName + "'"; - LOG.debug("Going to run " + update); - int numUpdated = stmt.executeUpdate(update); - if (numUpdated > 1 || numUpdated < 0) { - throw new HiveMetaException("Failed to properly update the " + tableName + - " table. Expected to update 1 row but instead updated " + numUpdated); - } - } - - // Quote if the database requires it - private String quoteIf(String identifier) { - return needsQuotedIdentifier ? quoteCharacter + identifier + quoteCharacter : identifier; - } - - // Quote always, for fields that mimic SQL keywords, like DESC - private String quoteAlways(String identifier) { - return quoteCharacter + identifier + quoteCharacter; - } - - /** - * Run pre-upgrade scripts corresponding to a given upgrade script, - * if any exist. The errors from pre-upgrade are ignored. - * Pre-upgrade scripts typically contain setup statements which - * may fail on some database versions and failure is ignorable. - * - * @param scriptDir upgrade script directory name - * @param scriptFile upgrade script file name - */ - private void runPreUpgrade(String scriptDir, String scriptFile) { - for (int i = 0;; i++) { - String preUpgradeScript = - metaStoreSchemaInfo.getPreUpgradeScriptName(i, scriptFile); - File preUpgradeScriptFile = new File(scriptDir, preUpgradeScript); - if (!preUpgradeScriptFile.isFile()) { - break; - } - - try { - runBeeLine(scriptDir, preUpgradeScript); - System.out.println("Completed " + preUpgradeScript); - } catch (Exception e) { - // Ignore the pre-upgrade script errors - System.err.println("Warning in pre-upgrade script " + preUpgradeScript + ": " - + e.getMessage()); - if (verbose) { - e.printStackTrace(); - } - } - } - } - - /*** - * Run beeline with the given metastore script. Flatten the nested scripts - * into single file. - */ - private void runBeeLine(String scriptDir, String scriptFile) - throws IOException, HiveMetaException { - NestedScriptParser dbCommandParser = getDbCommandParser(dbType, metaDbType); - - // expand the nested script - // If the metaDbType is set, this is setting up the information - // schema in Hive. That specifically means that the sql commands need - // to be adjusted for the underlying RDBMS (correct quotation - // strings, etc). - String sqlCommands = dbCommandParser.buildCommand(scriptDir, scriptFile, metaDbType != null); - File tmpFile = File.createTempFile("schematool", ".sql"); - tmpFile.deleteOnExit(); - - // write out the buffer into a file. Add beeline commands for autocommit and close - FileWriter fstream = new FileWriter(tmpFile.getPath()); - BufferedWriter out = new BufferedWriter(fstream); - out.write("!autocommit on" + System.getProperty("line.separator")); - out.write(sqlCommands); - out.write("!closeall" + System.getProperty("line.separator")); - out.close(); - runBeeLine(tmpFile.getPath()); - } - - // Generate the beeline args per hive conf and execute the given script - public void runBeeLine(String sqlScriptFile) throws IOException { - CommandBuilder builder = new CommandBuilder(hiveConf, url, driver, - userName, passWord, sqlScriptFile); - - // run the script using Beeline - try (BeeLine beeLine = new BeeLine()) { - if (!verbose) { - beeLine.setOutputStream(new PrintStream(new NullOutputStream())); - beeLine.getOpts().setSilent(true); - } - beeLine.getOpts().setAllowMultiLineCommand(false); - beeLine.getOpts().setIsolation("TRANSACTION_READ_COMMITTED"); - // We can be pretty sure that an entire line can be processed as a single command since - // we always add a line separator at the end while calling dbCommandParser.buildCommand. - beeLine.getOpts().setEntireLineAsCommand(true); - LOG.debug("Going to run command <" + builder.buildToLog() + ">"); - int status = beeLine.begin(builder.buildToRun(), null); - if (status != 0) { - throw new IOException("Schema script failed, errorcode " + status); - } - } - } - - static class CommandBuilder { - private final HiveConf hiveConf; - private final String userName; - private final String password; - private final String sqlScriptFile; - private final String driver; - private final String url; - - CommandBuilder(HiveConf hiveConf, String url, String driver, - String userName, String password, String sqlScriptFile) { - this.hiveConf = hiveConf; - this.userName = userName; - this.password = password; - this.url = url; - this.driver = driver; - this.sqlScriptFile = sqlScriptFile; - } - - String[] buildToRun() throws IOException { - return argsWith(password); - } - - String buildToLog() throws IOException { - logScript(); - return StringUtils.join(argsWith(BeeLine.PASSWD_MASK), " "); - } - - private String[] argsWith(String password) throws IOException { - return new String[] - { - "-u", url == null ? HiveSchemaHelper.getValidConfVar(MetastoreConf.ConfVars.CONNECT_URL_KEY, hiveConf) : url, - "-d", driver == null ? HiveSchemaHelper.getValidConfVar(MetastoreConf.ConfVars.CONNECTION_DRIVER, hiveConf) : driver, - "-n", userName, - "-p", password, - "-f", sqlScriptFile - }; - } - - private void logScript() throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("Going to invoke file that contains:"); - try (BufferedReader reader = new BufferedReader(new FileReader(sqlScriptFile))) { - String line; - while ((line = reader.readLine()) != null) { - LOG.debug("script: " + line); - } - } - } - } - } - - // Create the required command line options - @SuppressWarnings("static-access") - private static void initOptions(Options cmdLineOptions) { - Option help = new Option("help", "print this message"); - Option upgradeOpt = new Option("upgradeSchema", "Schema upgrade"); - Option upgradeFromOpt = OptionBuilder.withArgName("upgradeFrom").hasArg(). - withDescription("Schema upgrade from a version"). - create("upgradeSchemaFrom"); - Option initOpt = new Option("initSchema", "Schema initialization"); - Option initToOpt = OptionBuilder.withArgName("initTo").hasArg(). - withDescription("Schema initialization to a version"). - create("initSchemaTo"); - Option infoOpt = new Option("info", "Show config and schema details"); - Option validateOpt = new Option("validate", "Validate the database"); - Option createCatalog = OptionBuilder - .hasArg() - .withDescription("Create a catalog, requires --catalogLocation parameter as well") - .create("createCatalog"); - Option moveDatabase = OptionBuilder - .hasArg() - .withDescription("Move a database between catalogs. Argument is the database name. " + - "Requires --fromCatalog and --toCatalog parameters as well") - .create("moveDatabase"); - Option moveTable = OptionBuilder - .hasArg() - .withDescription("Move a table to a different database. Argument is the table name. " + - "Requires --fromCatalog, --toCatalog, --fromDatabase, and --toDatabase " + - " parameters as well.") - .create("moveTable"); - - OptionGroup optGroup = new OptionGroup(); - optGroup.addOption(upgradeOpt).addOption(initOpt). - addOption(help).addOption(upgradeFromOpt). - addOption(initToOpt).addOption(infoOpt).addOption(validateOpt) - .addOption(createCatalog).addOption(moveDatabase).addOption(moveTable); - optGroup.setRequired(true); - - Option userNameOpt = OptionBuilder.withArgName("user") - .hasArgs() - .withDescription("Override config file user name") - .create("userName"); - Option passwdOpt = OptionBuilder.withArgName("password") - .hasArgs() - .withDescription("Override config file password") - .create("passWord"); - Option dbTypeOpt = OptionBuilder.withArgName("databaseType") - .hasArgs().withDescription("Metastore database type") - .create("dbType"); - Option metaDbTypeOpt = OptionBuilder.withArgName("metaDatabaseType") - .hasArgs().withDescription("Used only if upgrading the system catalog for hive") - .create("metaDbType"); - Option urlOpt = OptionBuilder.withArgName("url") - .hasArgs().withDescription("connection url to the database") - .create("url"); - Option driverOpt = OptionBuilder.withArgName("driver") - .hasArgs().withDescription("driver name for connection") - .create("driver"); - Option dbOpts = OptionBuilder.withArgName("databaseOpts") - .hasArgs().withDescription("Backend DB specific options") - .create("dbOpts"); - Option dryRunOpt = new Option("dryRun", "list SQL scripts (no execute)"); - Option verboseOpt = new Option("verbose", "only print SQL statements"); - Option serversOpt = OptionBuilder.withArgName("serverList") - .hasArgs().withDescription("a comma-separated list of servers used in location validation in the format of scheme://authority (e.g. hdfs://localhost:8000)") - .create("servers"); - Option catalogLocation = OptionBuilder - .hasArg() - .withDescription("Location of new catalog, required when adding a catalog") - .create("catalogLocation"); - Option catalogDescription = OptionBuilder - .hasArg() - .withDescription("Description of new catalog") - .create("catalogDescription"); - Option ifNotExists = OptionBuilder - .withDescription("If passed then it is not an error to create an existing catalog") - .create("ifNotExists"); - Option toCatalog = OptionBuilder - .hasArg() - .withDescription("Catalog a moving database or table is going to. This is " + - "required if you are moving a database or table.") - .create("toCatalog"); - Option fromCatalog = OptionBuilder - .hasArg() - .withDescription("Catalog a moving database or table is coming from. This is " + - "required if you are moving a database or table.") - .create("fromCatalog"); - Option toDatabase = OptionBuilder - .hasArg() - .withDescription("Database a moving table is going to. This is " + - "required if you are moving a table.") - .create("toDatabase"); - Option fromDatabase = OptionBuilder - .hasArg() - .withDescription("Database a moving table is coming from. This is " + - "required if you are moving a table.") - .create("fromDatabase"); - cmdLineOptions.addOption(help); - cmdLineOptions.addOption(dryRunOpt); - cmdLineOptions.addOption(userNameOpt); - cmdLineOptions.addOption(passwdOpt); - cmdLineOptions.addOption(dbTypeOpt); - cmdLineOptions.addOption(verboseOpt); - cmdLineOptions.addOption(metaDbTypeOpt); - cmdLineOptions.addOption(urlOpt); - cmdLineOptions.addOption(driverOpt); - cmdLineOptions.addOption(dbOpts); - cmdLineOptions.addOption(serversOpt); - cmdLineOptions.addOption(catalogLocation); - cmdLineOptions.addOption(catalogDescription); - cmdLineOptions.addOption(ifNotExists); - cmdLineOptions.addOption(toCatalog); - cmdLineOptions.addOption(fromCatalog); - cmdLineOptions.addOption(toDatabase); - cmdLineOptions.addOption(fromDatabase); - cmdLineOptions.addOptionGroup(optGroup); - } - - public static void main(String[] args) { - CommandLineParser parser = new GnuParser(); - CommandLine line = null; - String dbType = null; - String metaDbType = null; - String schemaVer = null; - Options cmdLineOptions = new Options(); - - // Argument handling - initOptions(cmdLineOptions); - try { - line = parser.parse(cmdLineOptions, args); - } catch (ParseException e) { - System.err.println("HiveSchemaTool:Parsing failed. Reason: " + e.getLocalizedMessage()); - printAndExit(cmdLineOptions); - } - - if (line.hasOption("help")) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("schemaTool", cmdLineOptions); - return; - } - - if (line.hasOption("dbType")) { - dbType = line.getOptionValue("dbType"); - if ((!dbType.equalsIgnoreCase(HiveSchemaHelper.DB_DERBY) && - !dbType.equalsIgnoreCase(HiveSchemaHelper.DB_HIVE) && - !dbType.equalsIgnoreCase(HiveSchemaHelper.DB_MSSQL) && - !dbType.equalsIgnoreCase(HiveSchemaHelper.DB_MYSQL) && - !dbType.equalsIgnoreCase(HiveSchemaHelper.DB_POSTGRACE) && !dbType - .equalsIgnoreCase(HiveSchemaHelper.DB_ORACLE))) { - System.err.println("Unsupported dbType " + dbType); - printAndExit(cmdLineOptions); - } - } else { - System.err.println("no dbType supplied"); - printAndExit(cmdLineOptions); - } - - if (line.hasOption("metaDbType")) { - metaDbType = line.getOptionValue("metaDbType"); - - if (!dbType.equals(HiveSchemaHelper.DB_HIVE)) { - System.err.println("metaDbType only supported for dbType = hive"); - printAndExit(cmdLineOptions); - } - - if (!metaDbType.equalsIgnoreCase(HiveSchemaHelper.DB_DERBY) && - !metaDbType.equalsIgnoreCase(HiveSchemaHelper.DB_MSSQL) && - !metaDbType.equalsIgnoreCase(HiveSchemaHelper.DB_MYSQL) && - !metaDbType.equalsIgnoreCase(HiveSchemaHelper.DB_POSTGRACE) && - !metaDbType.equalsIgnoreCase(HiveSchemaHelper.DB_ORACLE)) { - System.err.println("Unsupported metaDbType " + metaDbType); - printAndExit(cmdLineOptions); - } - } else if (dbType.equalsIgnoreCase(HiveSchemaHelper.DB_HIVE)) { - System.err.println("no metaDbType supplied"); - printAndExit(cmdLineOptions); - } - - - System.setProperty(HiveConf.ConfVars.METASTORE_SCHEMA_VERIFICATION.varname, "true"); - try { - HiveSchemaTool schemaTool = new HiveSchemaTool(dbType, metaDbType); - - if (line.hasOption("userName")) { - schemaTool.setUserName(line.getOptionValue("userName")); - } else { - schemaTool.setUserName( - schemaTool.getHiveConf().get(ConfVars.METASTORE_CONNECTION_USER_NAME.varname)); - } - if (line.hasOption("passWord")) { - schemaTool.setPassWord(line.getOptionValue("passWord")); - } else { - try { - schemaTool.setPassWord(ShimLoader.getHadoopShims().getPassword(schemaTool.getHiveConf(), - HiveConf.ConfVars.METASTOREPWD.varname)); - } catch (IOException err) { - throw new HiveMetaException("Error getting metastore password", err); - } - } - if (line.hasOption("url")) { - schemaTool.setUrl(line.getOptionValue("url")); - } - if (line.hasOption("driver")) { - schemaTool.setDriver(line.getOptionValue("driver")); - } - if (line.hasOption("dryRun")) { - schemaTool.setDryRun(true); - } - if (line.hasOption("verbose")) { - schemaTool.setVerbose(true); - } - if (line.hasOption("dbOpts")) { - schemaTool.setDbOpts(line.getOptionValue("dbOpts")); - } - if (line.hasOption("validate") && line.hasOption("servers")) { - schemaTool.setValidationServers(line.getOptionValue("servers")); - } - if (line.hasOption("info")) { - schemaTool.showInfo(); - } else if (line.hasOption("upgradeSchema")) { - schemaTool.doUpgrade(); - } else if (line.hasOption("upgradeSchemaFrom")) { - schemaVer = line.getOptionValue("upgradeSchemaFrom"); - schemaTool.doUpgrade(schemaVer); - } else if (line.hasOption("initSchema")) { - schemaTool.doInit(); - } else if (line.hasOption("initSchemaTo")) { - schemaVer = line.getOptionValue("initSchemaTo"); - schemaTool.doInit(schemaVer); - } else if (line.hasOption("validate")) { - schemaTool.doValidate(); - } else if (line.hasOption("createCatalog")) { - schemaTool.createCatalog(line.getOptionValue("createCatalog"), - line.getOptionValue("catalogLocation"), line.getOptionValue("catalogDescription"), - line.hasOption("ifNotExists")); - } else if (line.hasOption("moveDatabase")) { - schemaTool.moveDatabase(line.getOptionValue("fromCatalog"), - line.getOptionValue("toCatalog"), line.getOptionValue("moveDatabase")); - } else if (line.hasOption("moveTable")) { - schemaTool.moveTable(line.getOptionValue("fromCatalog"), line.getOptionValue("toCatalog"), - line.getOptionValue("fromDatabase"), line.getOptionValue("toDatabase"), - line.getOptionValue("moveTable")); - } else { - System.err.println("no valid option supplied"); - printAndExit(cmdLineOptions); - } - } catch (HiveMetaException e) { - System.err.println(e); - if (e.getCause() != null) { - Throwable t = e.getCause(); - System.err.println("Underlying cause: " - + t.getClass().getName() + " : " - + t.getMessage()); - if (e.getCause() instanceof SQLException) { - System.err.println("SQL Error code: " + ((SQLException)t).getErrorCode()); - } - } - if (line.hasOption("verbose")) { - e.printStackTrace(); - } else { - System.err.println("Use --verbose for detailed stacktrace."); - } - System.err.println("*** schemaTool failed ***"); - System.exit(1); - } - System.out.println("schemaTool completed"); - - } -} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaTool.java new file mode 100644 index 0000000..2f920dc --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaTool.java @@ -0,0 +1,397 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline.schematool; + +import org.apache.commons.io.output.NullOutputStream; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.IMetaStoreSchemaInfo; +import org.apache.hadoop.hive.metastore.MetaStoreSchemaInfoFactory; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.MetaStoreConnectionInfo; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.NestedScriptParser; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hive.beeline.BeeLine; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintStream; +import java.net.URI; +import java.sql.Connection; +import java.sql.SQLException; + +public class HiveSchemaTool { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaTool.class.getName()); + + private final HiveConf hiveConf; + private final String dbType; + private final String metaDbType; + private final IMetaStoreSchemaInfo metaStoreSchemaInfo; + private final boolean needsQuotedIdentifier; + private String quoteCharacter; + + private String url = null; + private String driver = null; + private String userName = null; + private String passWord = null; + private boolean dryRun = false; + private boolean verbose = false; + private String dbOpts = null; + private URI[] validationServers = null; // The list of servers the database/partition/table can locate on + + private HiveSchemaTool(String dbType, String metaDbType) throws HiveMetaException { + this(System.getenv("HIVE_HOME"), new HiveConf(HiveSchemaTool.class), dbType, metaDbType); + } + + private HiveSchemaTool(String hiveHome, HiveConf hiveConf, String dbType, String metaDbType) + throws HiveMetaException { + if (hiveHome == null || hiveHome.isEmpty()) { + throw new HiveMetaException("No Hive home directory provided"); + } + this.hiveConf = hiveConf; + this.dbType = dbType; + this.metaDbType = metaDbType; + NestedScriptParser parser = getDbCommandParser(dbType, metaDbType); + this.needsQuotedIdentifier = parser.needsQuotedIdentifier(); + this.quoteCharacter = parser.getQuoteCharacter(); + this.metaStoreSchemaInfo = MetaStoreSchemaInfoFactory.get(hiveConf, hiveHome, dbType); + // If the dbType is "hive", this is setting up the information schema in Hive. + // We will set the default jdbc url and driver. + // It is overriden by command line options if passed (-url and -driver + if (dbType.equalsIgnoreCase(HiveSchemaHelper.DB_HIVE)) { + url = HiveSchemaHelper.EMBEDDED_HS2_URL; + driver = HiveSchemaHelper.HIVE_JDBC_DRIVER; + } + } + + HiveConf getHiveConf() { + return hiveConf; + } + + String getDbType() { + return dbType; + } + + IMetaStoreSchemaInfo getMetaStoreSchemaInfo() { + return metaStoreSchemaInfo; + } + + private void setUrl(String url) { + this.url = url; + } + + private void setDriver(String driver) { + this.driver = driver; + } + + private void setUserName(String userName) { + this.userName = userName; + } + + private void setPassWord(String passWord) { + this.passWord = passWord; + } + + private void setDryRun(boolean dryRun) { + this.dryRun = dryRun; + } + + boolean isDryRun() { + return dryRun; + } + + private void setVerbose(boolean verbose) { + this.verbose = verbose; + } + + boolean isVerbose() { + return verbose; + } + + private void setDbOpts(String dbOpts) { + this.dbOpts = dbOpts; + } + + private void setValidationServers(String servers) { + if(StringUtils.isNotEmpty(servers)) { + String[] strServers = servers.split(","); + this.validationServers = new URI[strServers.length]; + for (int i = 0; i < validationServers.length; i++) { + validationServers[i] = new Path(strServers[i]).toUri(); + } + } + } + + URI[] getValidationServers() { + return validationServers; + } + + Connection getConnectionToMetastore(boolean printInfo) throws HiveMetaException { + return HiveSchemaHelper.getConnectionToMetastore(userName, passWord, url, driver, printInfo, hiveConf, + null); + } + + private NestedScriptParser getDbCommandParser(String dbType, String metaDbType) { + return HiveSchemaHelper.getDbCommandParser(dbType, dbOpts, userName, passWord, hiveConf, + metaDbType, false); + } + + // test the connection metastore using the config property + void testConnectionToMetastore() throws HiveMetaException { + Connection conn = getConnectionToMetastore(true); + try { + conn.close(); + } catch (SQLException e) { + throw new HiveMetaException("Failed to close metastore connection", e); + } + } + + /** + * check if the current schema version in metastore matches the Hive version + * @throws MetaException + */ + void verifySchemaVersion() throws HiveMetaException { + // don't check version if its a dry run + if (dryRun) { + return; + } + String newSchemaVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(false)); + // verify that the new version is added to schema + assertCompatibleVersion(metaStoreSchemaInfo.getHiveSchemaVersion(), newSchemaVersion); + } + + void assertCompatibleVersion(String hiveSchemaVersion, String dbSchemaVersion) + throws HiveMetaException { + if (!metaStoreSchemaInfo.isVersionCompatible(hiveSchemaVersion, dbSchemaVersion)) { + throw new HiveMetaException("Metastore schema version is not compatible. Hive Version: " + + hiveSchemaVersion + ", Database Schema Version: " + dbSchemaVersion); + } + } + + MetaStoreConnectionInfo getConnectionInfo(boolean printInfo) { + return new MetaStoreConnectionInfo(userName, passWord, url, driver, printInfo, hiveConf, + dbType, metaDbType); + } + + // Quote if the database requires it + String quote(String stmt) { + stmt = stmt.replace("", needsQuotedIdentifier ? quoteCharacter : ""); + stmt = stmt.replace("", quoteCharacter); + return stmt; + } + + /*** + * Run beeline with the given metastore script. Flatten the nested scripts + * into single file. + */ + void runBeeLine(String scriptDir, String scriptFile) + throws IOException, HiveMetaException { + NestedScriptParser dbCommandParser = getDbCommandParser(dbType, metaDbType); + + // expand the nested script + // If the metaDbType is set, this is setting up the information + // schema in Hive. That specifically means that the sql commands need + // to be adjusted for the underlying RDBMS (correct quotation + // strings, etc). + String sqlCommands = dbCommandParser.buildCommand(scriptDir, scriptFile, metaDbType != null); + File tmpFile = File.createTempFile("schematool", ".sql"); + tmpFile.deleteOnExit(); + + // write out the buffer into a file. Add beeline commands for autocommit and close + FileWriter fstream = new FileWriter(tmpFile.getPath()); + BufferedWriter out = new BufferedWriter(fstream); + out.write("!autocommit on" + System.getProperty("line.separator")); + out.write(sqlCommands); + out.write("!closeall" + System.getProperty("line.separator")); + out.close(); + runBeeLine(tmpFile.getPath()); + } + + // Generate the beeline args per hive conf and execute the given script + void runBeeLine(String sqlScriptFile) throws IOException { + CommandBuilder builder = new CommandBuilder(hiveConf, url, driver, + userName, passWord, sqlScriptFile); + + // run the script using Beeline + try (BeeLine beeLine = new BeeLine()) { + if (!verbose) { + beeLine.setOutputStream(new PrintStream(new NullOutputStream())); + beeLine.getOpts().setSilent(true); + } + beeLine.getOpts().setAllowMultiLineCommand(false); + beeLine.getOpts().setIsolation("TRANSACTION_READ_COMMITTED"); + // We can be pretty sure that an entire line can be processed as a single command since + // we always add a line separator at the end while calling dbCommandParser.buildCommand. + beeLine.getOpts().setEntireLineAsCommand(true); + LOG.debug("Going to run command <" + builder.buildToLog() + ">"); + int status = beeLine.begin(builder.buildToRun(), null); + if (status != 0) { + throw new IOException("Schema script failed, errorcode " + status); + } + } + } + + static class CommandBuilder { + private final HiveConf hiveConf; + private final String userName; + private final String password; + private final String sqlScriptFile; + private final String driver; + private final String url; + + CommandBuilder(HiveConf hiveConf, String url, String driver, + String userName, String password, String sqlScriptFile) { + this.hiveConf = hiveConf; + this.userName = userName; + this.password = password; + this.url = url; + this.driver = driver; + this.sqlScriptFile = sqlScriptFile; + } + + String[] buildToRun() throws IOException { + return argsWith(password); + } + + String buildToLog() throws IOException { + logScript(); + return StringUtils.join(argsWith(BeeLine.PASSWD_MASK), " "); + } + + private String[] argsWith(String password) throws IOException { + return new String[] + { + "-u", url == null ? HiveSchemaHelper.getValidConfVar(MetastoreConf.ConfVars.CONNECT_URL_KEY, hiveConf) : url, + "-d", driver == null ? HiveSchemaHelper.getValidConfVar(MetastoreConf.ConfVars.CONNECTION_DRIVER, hiveConf) : driver, + "-n", userName, + "-p", password, + "-f", sqlScriptFile + }; + } + + private void logScript() throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Going to invoke file that contains:"); + try (BufferedReader reader = new BufferedReader(new FileReader(sqlScriptFile))) { + String line; + while ((line = reader.readLine()) != null) { + LOG.debug("script: " + line); + } + } + } + } + } + + public static void main(String[] args) { + HiveSchemaToolCommandLine line = new HiveSchemaToolCommandLine(args); + + System.setProperty(MetastoreConf.ConfVars.SCHEMA_VERIFICATION.getVarname(), "true"); + try { + HiveSchemaTool schemaTool = createSchemaTool(line); + + HiveSchemaToolTask task = null; + if (line.hasOption("info")) { + task = new HiveSchemaToolTaskInfo(); + } else if (line.hasOption("upgradeSchema") || line.hasOption("upgradeSchemaFrom")) { + task = new HiveSchemaToolTaskUpgrade(); + } else if (line.hasOption("initSchema") || line.hasOption("initSchemaTo")) { + task = new HiveSchemaToolTaskInit(); + } else if (line.hasOption("validate")) { + task = new HiveSchemaToolTaskValidate(); + } else if (line.hasOption("createCatalog")) { + task = new HiveSchemaToolTaskCreateCatalog(); + } else if (line.hasOption("moveDatabase")) { + task = new HiveSchemaToolTaskMoveDatabase(); + } else if (line.hasOption("moveTable")) { + task = new HiveSchemaToolTaskMoveTable(); + } + + task.setHiveSchemaTool(schemaTool); + task.setCommandLineArguments(line); + task.execute(); + + } catch (HiveMetaException e) { + System.err.println(e); + if (e.getCause() != null) { + Throwable t = e.getCause(); + System.err.println("Underlying cause: " + t.getClass().getName() + " : " + t.getMessage()); + if (e.getCause() instanceof SQLException) { + System.err.println("SQL Error code: " + ((SQLException)t).getErrorCode()); + } + } + if (line.hasOption("verbose")) { + e.printStackTrace(); + } else { + System.err.println("Use --verbose for detailed stacktrace."); + } + System.err.println("*** schemaTool failed ***"); + System.exit(1); + } + System.out.println("schemaTool completed"); + } + + private static HiveSchemaTool createSchemaTool(HiveSchemaToolCommandLine line) throws HiveMetaException { + HiveSchemaTool schemaTool = new HiveSchemaTool(line.getDbType(), line.getMetaDbType()); + + if (line.hasOption("userName")) { + schemaTool.setUserName(line.getOptionValue("userName")); + } else { + schemaTool.setUserName( + schemaTool.getHiveConf().get(MetastoreConf.ConfVars.CONNECTION_USER_NAME.getVarname())); + } + if (line.hasOption("passWord")) { + schemaTool.setPassWord(line.getOptionValue("passWord")); + } else { + try { + schemaTool.setPassWord(ShimLoader.getHadoopShims().getPassword(schemaTool.getHiveConf(), + MetastoreConf.ConfVars.PWD.getVarname())); + } catch (IOException err) { + throw new HiveMetaException("Error getting metastore password", err); + } + } + if (line.hasOption("url")) { + schemaTool.setUrl(line.getOptionValue("url")); + } + if (line.hasOption("driver")) { + schemaTool.setDriver(line.getOptionValue("driver")); + } + if (line.hasOption("dryRun")) { + schemaTool.setDryRun(true); + } + if (line.hasOption("verbose")) { + schemaTool.setVerbose(true); + } + if (line.hasOption("dbOpts")) { + schemaTool.setDbOpts(line.getOptionValue("dbOpts")); + } + if (line.hasOption("validate") && line.hasOption("servers")) { + schemaTool.setValidationServers(line.getOptionValue("servers")); + } + return schemaTool; + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolCommandLine.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolCommandLine.java new file mode 100644 index 0000000..8b018a0 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolCommandLine.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline.schematool; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; + +import com.google.common.collect.ImmutableSet; + +import java.util.Set; + +class HiveSchemaToolCommandLine { + private static final Options cmdLineOptions = createOptions(); + + @SuppressWarnings("static-access") + private static Options createOptions() { + Option help = new Option("help", "print this message"); + Option infoOpt = new Option("info", "Show config and schema details"); + Option upgradeOpt = new Option("upgradeSchema", "Schema upgrade"); + Option upgradeFromOpt = OptionBuilder.withArgName("upgradeFrom").hasArg() + .withDescription("Schema upgrade from a version") + .create("upgradeSchemaFrom"); + Option initOpt = new Option("initSchema", "Schema initialization"); + Option initToOpt = OptionBuilder.withArgName("initTo").hasArg() + .withDescription("Schema initialization to a version") + .create("initSchemaTo"); + Option validateOpt = new Option("validate", "Validate the database"); + Option createCatalog = OptionBuilder + .hasArg() + .withDescription("Create a catalog, requires --catalogLocation parameter as well") + .create("createCatalog"); + Option moveDatabase = OptionBuilder + .hasArg() + .withDescription("Move a database between catalogs. Argument is the database name. " + + "Requires --fromCatalog and --toCatalog parameters as well") + .create("moveDatabase"); + Option moveTable = OptionBuilder + .hasArg() + .withDescription("Move a table to a different database. Argument is the table name. " + + "Requires --fromCatalog, --toCatalog, --fromDatabase, and --toDatabase " + + " parameters as well.") + .create("moveTable"); + + OptionGroup optGroup = new OptionGroup(); + optGroup.addOption(help).addOption(infoOpt).addOption(upgradeOpt).addOption(upgradeFromOpt) + .addOption(initOpt).addOption(initToOpt).addOption(validateOpt).addOption(createCatalog) + .addOption(moveDatabase).addOption(moveTable); + optGroup.setRequired(true); + + Option userNameOpt = OptionBuilder.withArgName("user") + .hasArgs() + .withDescription("Override config file user name") + .create("userName"); + Option passwdOpt = OptionBuilder.withArgName("password") + .hasArgs() + .withDescription("Override config file password") + .create("passWord"); + Option dbTypeOpt = OptionBuilder.withArgName("databaseType") + .hasArgs().withDescription("Metastore database type") + .create("dbType"); + Option metaDbTypeOpt = OptionBuilder.withArgName("metaDatabaseType") + .hasArgs().withDescription("Used only if upgrading the system catalog for hive") + .create("metaDbType"); + Option urlOpt = OptionBuilder.withArgName("url") + .hasArgs().withDescription("connection url to the database") + .create("url"); + Option driverOpt = OptionBuilder.withArgName("driver") + .hasArgs().withDescription("driver name for connection") + .create("driver"); + Option dbOpts = OptionBuilder.withArgName("databaseOpts") + .hasArgs().withDescription("Backend DB specific options") + .create("dbOpts"); + Option dryRunOpt = new Option("dryRun", "list SQL scripts (no execute)"); + Option verboseOpt = new Option("verbose", "only print SQL statements"); + Option serversOpt = OptionBuilder.withArgName("serverList") + .hasArgs().withDescription("a comma-separated list of servers used in location validation in the format of scheme://authority (e.g. hdfs://localhost:8000)") + .create("servers"); + Option catalogLocation = OptionBuilder + .hasArg() + .withDescription("Location of new catalog, required when adding a catalog") + .create("catalogLocation"); + Option catalogDescription = OptionBuilder + .hasArg() + .withDescription("Description of new catalog") + .create("catalogDescription"); + Option ifNotExists = OptionBuilder + .withDescription("If passed then it is not an error to create an existing catalog") + .create("ifNotExists"); + Option fromCatalog = OptionBuilder + .hasArg() + .withDescription("Catalog a moving database or table is coming from. This is " + + "required if you are moving a database or table.") + .create("fromCatalog"); + Option toCatalog = OptionBuilder + .hasArg() + .withDescription("Catalog a moving database or table is going to. This is " + + "required if you are moving a database or table.") + .create("toCatalog"); + Option fromDatabase = OptionBuilder + .hasArg() + .withDescription("Database a moving table is coming from. This is " + + "required if you are moving a table.") + .create("fromDatabase"); + Option toDatabase = OptionBuilder + .hasArg() + .withDescription("Database a moving table is going to. This is " + + "required if you are moving a table.") + .create("toDatabase"); + + Options cmdLineOptions = new Options(); + cmdLineOptions.addOption(help); + cmdLineOptions.addOptionGroup(optGroup); + cmdLineOptions.addOption(dbTypeOpt); + cmdLineOptions.addOption(metaDbTypeOpt); + cmdLineOptions.addOption(userNameOpt); + cmdLineOptions.addOption(passwdOpt); + cmdLineOptions.addOption(urlOpt); + cmdLineOptions.addOption(driverOpt); + cmdLineOptions.addOption(dbOpts); + cmdLineOptions.addOption(dryRunOpt); + cmdLineOptions.addOption(verboseOpt); + cmdLineOptions.addOption(serversOpt); + cmdLineOptions.addOption(catalogLocation); + cmdLineOptions.addOption(catalogDescription); + cmdLineOptions.addOption(ifNotExists); + cmdLineOptions.addOption(fromCatalog); + cmdLineOptions.addOption(toCatalog); + cmdLineOptions.addOption(fromDatabase); + cmdLineOptions.addOption(toDatabase); + + return cmdLineOptions; + } + + private final CommandLine cl; + private final String dbType; + private final String metaDbType; + + HiveSchemaToolCommandLine(String[] args) { + cl = getCommandLine(args); + if (cl.hasOption("help")) { + printAndExit(0); + } + + dbType = cl.getOptionValue("dbType"); + metaDbType = cl.getOptionValue("metaDbType"); + + validate(); + } + + private CommandLine getCommandLine(String[] args) { + try { + CommandLineParser parser = new GnuParser(); + return parser.parse(cmdLineOptions, args); + } catch (ParseException e) { + System.err.println("HiveSchemaTool:Parsing failed. Reason: " + e.getLocalizedMessage()); + printAndExit(1); + return null; + } + } + + private static final Set VALID_DB_TYPES = ImmutableSet.of(HiveSchemaHelper.DB_DERBY, + HiveSchemaHelper.DB_HIVE, HiveSchemaHelper.DB_MSSQL, HiveSchemaHelper.DB_MYSQL, + HiveSchemaHelper.DB_POSTGRACE, HiveSchemaHelper.DB_ORACLE); + + private static final Set VALID_META_DB_TYPES = ImmutableSet.of(HiveSchemaHelper.DB_DERBY, + HiveSchemaHelper.DB_MSSQL, HiveSchemaHelper.DB_MYSQL, HiveSchemaHelper.DB_POSTGRACE, + HiveSchemaHelper.DB_ORACLE); + + private void validate() { + if (dbType == null) { + System.err.println("no dbType supplied"); + printAndExit(1); + } + if (!VALID_DB_TYPES.contains(dbType)) { + System.err.println("Unsupported dbType " + dbType); + printAndExit(1); + } + + if (metaDbType != null) { + if (!dbType.equals(HiveSchemaHelper.DB_HIVE)) { + System.err.println("metaDbType only supported for dbType = hive"); + printAndExit(1); + } + if (!VALID_META_DB_TYPES.contains(metaDbType)) { + System.err.println("Unsupported metaDbType " + metaDbType); + printAndExit(1); + } + } else if (dbType.equalsIgnoreCase(HiveSchemaHelper.DB_HIVE)) { + System.err.println("no metaDbType supplied"); + printAndExit(1); + } + + if (cl.hasOption("createCatalog") && !cl.hasOption("catalogLocation")) { + System.err.println("catalogLocation must be set"); + printAndExit(1); + } + + if (!cl.hasOption("createCatalog") && + (cl.hasOption("catalogLocation") || cl.hasOption("catalogDescription") || cl.hasOption("ifNotExists"))) { + System.err.println("catalogLocation, catalogDescription and ifNotExists may be set only for createCatalog"); + printAndExit(1); + } + + if (cl.hasOption("moveDatabase") && + (!cl.hasOption("fromCatalog") || !cl.hasOption("toCatalog"))) { + System.err.println("fromCatalog and toCatalog must be set"); + printAndExit(1); + } + + if (cl.hasOption("moveTable") && + (!cl.hasOption("fromCatalog") || !cl.hasOption("toCatalog") || + !cl.hasOption("fromDatabase") || !cl.hasOption("toDatabase"))) { + System.err.println("fromCatalog, toCatalog, fromDatabase and toDatabase must be set"); + printAndExit(1); + } + + if ((!cl.hasOption("moveDatabase") && !cl.hasOption("moveTable")) && + (cl.hasOption("fromCatalog") || cl.hasOption("toCatalog"))) { + System.err.println("fromCatalog and toCatalog may be set only for moveDatabase and moveTable"); + printAndExit(1); + } + + if (!cl.hasOption("moveTable") && + (cl.hasOption("fromDatabase") || cl.hasOption("toDatabase"))) { + System.err.println("fromDatabase and toDatabase may be set only for moveTable"); + printAndExit(1); + } + } + + private void printAndExit(int exitValue) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("schemaTool", cmdLineOptions); + System.exit(exitValue); + } + + String getDbType() { + return dbType; + } + + String getMetaDbType() { + return metaDbType; + } + + boolean hasOption(String opt) { + return cl.hasOption(opt); + } + + String getOptionValue(String opt) { + return cl.getOptionValue(opt); + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTask.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTask.java new file mode 100644 index 0000000..1bc4b39 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTask.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import org.apache.hadoop.hive.metastore.HiveMetaException; + +abstract class HiveSchemaToolTask { + protected HiveSchemaTool schemaTool; + + void setHiveSchemaTool(HiveSchemaTool schemaTool) { + this.schemaTool = schemaTool; + } + + abstract void setCommandLineArguments(HiveSchemaToolCommandLine cl); + + abstract void execute() throws HiveMetaException; +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskCreateCatalog.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskCreateCatalog.java new file mode 100644 index 0000000..0443534 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskCreateCatalog.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskCreateCatalog extends HiveSchemaToolTask { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaToolTaskCreateCatalog.class.getName()); + + private String catName; + private String location; + private String description; + private boolean ifNotExists; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + catName = normalizeIdentifier(cl.getOptionValue("createCatalog")); + location = cl.getOptionValue("catalogLocation"); + description = cl.getOptionValue("catalogDescription"); + ifNotExists = cl.hasOption("ifNotExists"); + } + + @Override + void execute() throws HiveMetaException { + System.out.println("Create catalog " + catName + " at location " + location); + + Connection conn = schemaTool.getConnectionToMetastore(true); + boolean success = false; + try { + conn.setAutoCommit(false); + try (Statement stmt = conn.createStatement()) { + // If they set ifNotExists check for existence first, and bail if it exists. This is + // more reliable then attempting to parse the error message from the SQLException. + if (ifNotExists && catalogExists(stmt)) { + return; + } + + int catNum = getNextCatalogId(stmt); + addCatalog(conn, stmt, catNum); + success = true; + } + } catch (MetaException|SQLException e) { + throw new HiveMetaException("Failed to add catalog", e); + } finally { + try { + if (!success) conn.rollback(); + } catch (SQLException e) { + // Not really much we can do here. + LOG.error("Failed to rollback, everything will probably go bad from here."); + } + } + } + + private static final String CATALOG_EXISTS_QUERY = + "select NAME " + + " from CTLGS " + + " where NAME = '%s'"; + + private boolean catalogExists(Statement stmt) throws SQLException { + String query = String.format(schemaTool.quote(CATALOG_EXISTS_QUERY), catName); + LOG.debug("Going to run " + query); + ResultSet rs = stmt.executeQuery(query); + if (rs.next()) { + System.out.println("Catalog " + catName + " already exists"); + return true; + } + + return false; + } + + private static final String NEXT_CATALOG_ID_QUERY = + "select max(CTLG_ID) " + + " from CTLGS"; + + private int getNextCatalogId(Statement stmt) throws SQLException, MetaException, HiveMetaException { + String query = schemaTool.quote(NEXT_CATALOG_ID_QUERY); + LOG.debug("Going to run " + query); + ResultSet rs = stmt.executeQuery(query); + if (!rs.next()) { + throw new HiveMetaException("No catalogs found, have you upgraded the database?"); + } + return rs.getInt(1) + 1; + } + + private static final String ADD_CATALOG_STMT = + "insert into CTLGS (CTLG_ID, NAME, DESC, LOCATION_URI) " + + " values (%d, '%s', '%s', '%s')"; + + private void addCatalog(Connection conn, Statement stmt, int catNum) throws SQLException { + String update = String.format(schemaTool.quote(ADD_CATALOG_STMT), catNum, catName, description, location); + LOG.debug("Going to run " + update); + stmt.execute(update); + conn.commit(); + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInfo.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInfo.java new file mode 100644 index 0000000..23919e0 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInfo.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.MetaStoreConnectionInfo; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskInfo extends HiveSchemaToolTask { + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + // do nothing + } + + @Override + void execute() throws HiveMetaException { + String hiveVersion = schemaTool.getMetaStoreSchemaInfo().getHiveSchemaVersion(); + MetaStoreConnectionInfo connectionInfo = schemaTool.getConnectionInfo(true); + String dbVersion = schemaTool.getMetaStoreSchemaInfo().getMetaStoreSchemaVersion(connectionInfo); + + System.out.println("Hive distribution version:\t " + hiveVersion); + System.out.println("Metastore schema version:\t " + dbVersion); + + schemaTool.assertCompatibleVersion(hiveVersion, dbVersion); + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInit.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInit.java new file mode 100644 index 0000000..09ba83e --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInit.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import java.io.IOException; + +import org.apache.hadoop.hive.metastore.HiveMetaException; + +/** + * Initialize the metastore schema + */ +class HiveSchemaToolTaskInit extends HiveSchemaToolTask { + private String toVersion; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + if (cl.hasOption("initSchemaTo")) { + this.toVersion = cl.getOptionValue("initSchemaTo"); + } + } + + private void ensureToVersion() throws HiveMetaException { + if (toVersion != null) { + return; + } + + // If null then current hive version is used + toVersion = schemaTool.getMetaStoreSchemaInfo().getHiveSchemaVersion(); + System.out.println("Initializing the schema to: " + toVersion); + } + + @Override + void execute() throws HiveMetaException { + ensureToVersion(); + + schemaTool.testConnectionToMetastore(); + System.out.println("Starting metastore schema initialization to " + toVersion); + + String initScriptDir = schemaTool.getMetaStoreSchemaInfo().getMetaStoreScriptDir(); + String initScriptFile = schemaTool.getMetaStoreSchemaInfo().generateInitFileName(toVersion); + + try { + System.out.println("Initialization script " + initScriptFile); + if (!schemaTool.isDryRun()) { + schemaTool.runBeeLine(initScriptDir, initScriptFile); + System.out.println("Initialization script completed"); + } + } catch (IOException e) { + throw new HiveMetaException("Schema initialization FAILED! Metastore state would be inconsistent!", e); + } + + schemaTool.verifySchemaVersion(); + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveDatabase.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveDatabase.java new file mode 100644 index 0000000..12ebbaf --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveDatabase.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; + +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskMoveDatabase extends HiveSchemaToolTask { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaToolTaskMoveDatabase.class.getName()); + + private String fromCatName; + private String toCatName; + private String dbName; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + fromCatName = normalizeIdentifier(cl.getOptionValue("fromCatalog")); + toCatName = normalizeIdentifier(cl.getOptionValue("toCatalog")); + dbName = normalizeIdentifier(cl.getOptionValue("moveDatabase")); + } + + @Override + void execute() throws HiveMetaException { + System.out.println(String.format("Moving database %s from catalog %s to catalog %s", + dbName, fromCatName, toCatName)); + Connection conn = schemaTool.getConnectionToMetastore(true); + boolean success = false; + try { + conn.setAutoCommit(false); + try (Statement stmt = conn.createStatement()) { + updateCatalogNameInTable(stmt, "DBS", "CTLG_NAME", "NAME", fromCatName, toCatName, dbName, false); + updateCatalogNameInTable(stmt, "TAB_COL_STATS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); + updateCatalogNameInTable(stmt, "PART_COL_STATS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); + updateCatalogNameInTable(stmt, "PARTITION_EVENTS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); + updateCatalogNameInTable(stmt, "NOTIFICATION_LOG", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); + conn.commit(); + success = true; + } + } catch (SQLException e) { + throw new HiveMetaException("Failed to move database", e); + } finally { + try { + if (!success) conn.rollback(); + } catch (SQLException e) { + // Not really much we can do here. + LOG.error("Failed to rollback, everything will probably go bad from here."); + } + } + } + + private void updateCatalogNameInTable(Statement stmt, String tableName, String catColName, + String dbColName, String fromCatName, + String toCatName, String dbName, boolean zeroUpdatesOk) + throws HiveMetaException, SQLException { + String update = schemaTool.quote( + "update " + tableName + " " + + " set " + catColName + " = '" + toCatName + "' " + + " where " + catColName + " = '" + fromCatName + "'" + + " and " + dbColName + " = '" + dbName + "'"); + LOG.debug("Going to run " + update); + int numUpdated = stmt.executeUpdate(update); + if (numUpdated != 1 && !(zeroUpdatesOk && numUpdated == 0)) { + throw new HiveMetaException("Failed to properly update the " + tableName + + " table. Expected to update 1 row but instead updated " + numUpdated); + } + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveTable.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveTable.java new file mode 100644 index 0000000..63e2d61 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveTable.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskMoveTable extends HiveSchemaToolTask { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaToolTaskMoveTable.class.getName()); + + private String fromCat; + private String toCat; + private String fromDb; + private String toDb; + private String tableName; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + fromCat = normalizeIdentifier(cl.getOptionValue("fromCatalog")); + toCat = normalizeIdentifier(cl.getOptionValue("toCatalog")); + fromDb = normalizeIdentifier(cl.getOptionValue("fromDatabase")); + toDb = normalizeIdentifier(cl.getOptionValue("toDatabase")); + tableName = normalizeIdentifier(cl.getOptionValue("moveTable")); + } + + @Override + void execute() throws HiveMetaException { + Connection conn = schemaTool.getConnectionToMetastore(true); + boolean success = false; + try { + conn.setAutoCommit(false); + try (Statement stmt = conn.createStatement()) { + updateTableId(stmt); + updateDbNameForTable(stmt, "TAB_COL_STATS", "TABLE_NAME", fromCat, toCat, fromDb, toDb, tableName); + updateDbNameForTable(stmt, "PART_COL_STATS", "TABLE_NAME", fromCat, toCat, fromDb, toDb, tableName); + updateDbNameForTable(stmt, "PARTITION_EVENTS", "TBL_NAME", fromCat, toCat, fromDb, toDb, tableName); + updateDbNameForTable(stmt, "NOTIFICATION_LOG", "TBL_NAME", fromCat, toCat, fromDb, toDb, tableName); + conn.commit(); + success = true; + } + } catch (SQLException se) { + throw new HiveMetaException("Failed to move table", se); + } finally { + try { + if (!success) conn.rollback(); + } catch (SQLException e) { + // Not really much we can do here. + LOG.error("Failed to rollback, everything will probably go bad from here."); + } + + } + } + + private static final String UPDATE_TABLE_ID_STMT = + "update TBLS " + + " set DB_ID = %d " + + " where DB_ID = %d " + + " and TBL_NAME = '%s'"; + + private void updateTableId(Statement stmt) throws SQLException, HiveMetaException { + // Find the old database id + long oldDbId = getDbId(stmt, fromDb, fromCat); + + // Find the new database id + long newDbId = getDbId(stmt, toDb, toCat); + + String update = String.format(schemaTool.quote(UPDATE_TABLE_ID_STMT), newDbId, oldDbId, tableName); + LOG.debug("Going to run " + update); + int numUpdated = stmt.executeUpdate(update); + if (numUpdated != 1) { + throw new HiveMetaException( + "Failed to properly update TBLS table. Expected to update " + + "1 row but instead updated " + numUpdated); + } + } + + private static final String DB_ID_QUERY = + "select DB_ID " + + " from DBS " + + " where NAME = '%s' " + + " and CTLG_NAME = '%s'"; + + private long getDbId(Statement stmt, String db, String catalog) throws SQLException, HiveMetaException { + String query = String.format(schemaTool.quote(DB_ID_QUERY), db, catalog); + LOG.debug("Going to run " + query); + ResultSet rs = stmt.executeQuery(query); + if (!rs.next()) { + throw new HiveMetaException("Unable to find database " + fromDb); + } + return rs.getLong(1); + } + + private void updateDbNameForTable(Statement stmt, String tableName, + String tableColumnName, String fromCat, String toCat, + String fromDb, String toDb, String hiveTblName) + throws HiveMetaException, SQLException { + String update = schemaTool.quote( + "update " + tableName + " " + + " set CAT_NAME = '" + toCat + "', " + + " DB_NAME = '" + toDb + "' " + + " where CAT_NAME = '" + fromCat + "' " + + " and DB_NAME = '" + fromDb + "' " + + " and " + tableColumnName + " = '" + hiveTblName + "'"); + + LOG.debug("Going to run " + update); + int numUpdated = stmt.executeUpdate(update); + if (numUpdated > 1 || numUpdated < 0) { + throw new HiveMetaException("Failed to properly update the " + tableName + + " table. Expected to update 1 row but instead updated " + numUpdated); + } + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskUpgrade.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskUpgrade.java new file mode 100644 index 0000000..e19084c --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskUpgrade.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.MetaStoreConnectionInfo; + +/** + * Perform metastore schema upgrade + */ +class HiveSchemaToolTaskUpgrade extends HiveSchemaToolTask { + private String fromVersion; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + if (cl.hasOption("upgradeSchemaFrom")) { + this.fromVersion = cl.getOptionValue("upgradeSchemaFrom"); + } + } + + private void ensureFromVersion() throws HiveMetaException { + if (fromVersion != null) { + return; + } + + // If null, then read from the metastore + MetaStoreConnectionInfo connectionInfo = schemaTool.getConnectionInfo(false); + fromVersion = schemaTool.getMetaStoreSchemaInfo().getMetaStoreSchemaVersion(connectionInfo); + if (fromVersion == null || fromVersion.isEmpty()) { + throw new HiveMetaException("Schema version not stored in the metastore. " + + "Metastore schema is too old or corrupt. Try specifying the version manually"); + } + System.out.println("Upgrading from the version " + fromVersion); + } + + @Override + void execute() throws HiveMetaException { + ensureFromVersion(); + + if (schemaTool.getMetaStoreSchemaInfo().getHiveSchemaVersion().equals(fromVersion)) { + System.out.println("No schema upgrade required from version " + fromVersion); + return; + } + + // Find the list of scripts to execute for this upgrade + List upgradeScripts = schemaTool.getMetaStoreSchemaInfo().getUpgradeScripts(fromVersion); + schemaTool.testConnectionToMetastore(); + System.out.println("Starting upgrade metastore schema from version " + fromVersion + " to " + + schemaTool.getMetaStoreSchemaInfo().getHiveSchemaVersion()); + String scriptDir = schemaTool.getMetaStoreSchemaInfo().getMetaStoreScriptDir(); + try { + for (String scriptFile : upgradeScripts) { + System.out.println("Upgrade script " + scriptFile); + if (!schemaTool.isDryRun()) { + runPreUpgrade(scriptDir, scriptFile); + schemaTool.runBeeLine(scriptDir, scriptFile); + System.out.println("Completed " + scriptFile); + } + } + } catch (IOException e) { + throw new HiveMetaException("Upgrade FAILED! Metastore state would be inconsistent !!", e); + } + + // Revalidated the new version after upgrade + schemaTool.verifySchemaVersion(); + } + + /** + * Run pre-upgrade scripts corresponding to a given upgrade script, + * if any exist. The errors from pre-upgrade are ignored. + * Pre-upgrade scripts typically contain setup statements which + * may fail on some database versions and failure is ignorable. + * + * @param scriptDir upgrade script directory name + * @param scriptFile upgrade script file name + */ + private void runPreUpgrade(String scriptDir, String scriptFile) { + for (int i = 0;; i++) { + String preUpgradeScript = schemaTool.getMetaStoreSchemaInfo().getPreUpgradeScriptName(i, scriptFile); + File preUpgradeScriptFile = new File(scriptDir, preUpgradeScript); + if (!preUpgradeScriptFile.isFile()) { + break; + } + + try { + schemaTool.runBeeLine(scriptDir, preUpgradeScript); + System.out.println("Completed " + preUpgradeScript); + } catch (Exception e) { + // Ignore the pre-upgrade script errors + System.err.println("Warning in pre-upgrade script " + preUpgradeScript + ": " + e.getMessage()); + if (schemaTool.isVerbose()) { + e.printStackTrace(); + } + } + } + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskValidate.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskValidate.java new file mode 100644 index 0000000..d4fd878 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskValidate.java @@ -0,0 +1,628 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.net.URI; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.SQLFeatureNotSupportedException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.MetaStoreConnectionInfo; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.NestedScriptParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.ImmutableMap; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskValidate extends HiveSchemaToolTask { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaToolTaskValidate.class.getName()); + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + // do nothing + } + + @Override + void execute() throws HiveMetaException { + System.out.println("Starting metastore validation\n"); + Connection conn = schemaTool.getConnectionToMetastore(false); + boolean success = true; + try { + success &= validateSchemaVersions(); + success &= validateSequences(conn); + success &= validateSchemaTables(conn); + success &= validateLocations(conn, schemaTool.getValidationServers()); + success &= validateColumnNullValues(conn); + } finally { + if (conn != null) { + try { + conn.close(); + } catch (SQLException e) { + throw new HiveMetaException("Failed to close metastore connection", e); + } + } + } + + System.out.print("Done with metastore validation: "); + if (!success) { + System.out.println("[FAIL]"); + System.exit(1); + } else { + System.out.println("[SUCCESS]"); + } + } + + boolean validateSchemaVersions() throws HiveMetaException { + System.out.println("Validating schema version"); + try { + String hiveSchemaVersion = schemaTool.getMetaStoreSchemaInfo().getHiveSchemaVersion(); + MetaStoreConnectionInfo connectionInfo = schemaTool.getConnectionInfo(false); + String newSchemaVersion = schemaTool.getMetaStoreSchemaInfo().getMetaStoreSchemaVersion(connectionInfo); + schemaTool.assertCompatibleVersion(hiveSchemaVersion, newSchemaVersion); + } catch (HiveMetaException hme) { + if (hme.getMessage().contains("Metastore schema version is not compatible") || + hme.getMessage().contains("Multiple versions were found in metastore") || + hme.getMessage().contains("Could not find version info in metastore VERSION table")) { + System.err.println(hme.getMessage()); + System.out.println("[FAIL]\n"); + return false; + } else { + throw hme; + } + } + System.out.println("[SUCCESS]\n"); + return true; + } + + private static final String QUERY_SEQ = + " select t.NEXT_VAL" + + " from SEQUENCE_TABLE t " + + " where t.SEQUENCE_NAME = ? " + + "order by t.SEQUENCE_NAME"; + + private static final String QUERY_MAX_ID = + "select max(%s)" + + " from %s"; + + boolean validateSequences(Connection conn) throws HiveMetaException { + Map> seqNameToTable = + new ImmutableMap.Builder>() + .put("MDatabase", Pair.of("DBS", "DB_ID")) + .put("MRole", Pair.of("ROLES", "ROLE_ID")) + .put("MGlobalPrivilege", Pair.of("GLOBAL_PRIVS", "USER_GRANT_ID")) + .put("MTable", Pair.of("TBLS","TBL_ID")) + .put("MStorageDescriptor", Pair.of("SDS", "SD_ID")) + .put("MSerDeInfo", Pair.of("SERDES", "SERDE_ID")) + .put("MColumnDescriptor", Pair.of("CDS", "CD_ID")) + .put("MTablePrivilege", Pair.of("TBL_PRIVS", "TBL_GRANT_ID")) + .put("MTableColumnStatistics", Pair.of("TAB_COL_STATS", "CS_ID")) + .put("MPartition", Pair.of("PARTITIONS", "PART_ID")) + .put("MPartitionColumnStatistics", Pair.of("PART_COL_STATS", "CS_ID")) + .put("MFunction", Pair.of("FUNCS", "FUNC_ID")) + .put("MIndex", Pair.of("IDXS", "INDEX_ID")) + .put("MStringList", Pair.of("SKEWED_STRING_LIST", "STRING_LIST_ID")) + .build(); + + System.out.println("Validating sequence number for SEQUENCE_TABLE"); + + boolean isValid = true; + try { + Statement stmt = conn.createStatement(); + for (String seqName : seqNameToTable.keySet()) { + String tableName = seqNameToTable.get(seqName).getLeft(); + String tableKey = seqNameToTable.get(seqName).getRight(); + String fullSequenceName = "org.apache.hadoop.hive.metastore.model." + seqName; + String seqQuery = schemaTool.quote(QUERY_SEQ); + String maxIdQuery = String.format(schemaTool.quote(QUERY_MAX_ID), tableKey, tableName); + + ResultSet res = stmt.executeQuery(maxIdQuery); + if (res.next()) { + long maxId = res.getLong(1); + if (maxId > 0) { + PreparedStatement stmtSeq = conn.prepareStatement(seqQuery); + stmtSeq.setString(1, fullSequenceName); + ResultSet resSeq = stmtSeq.executeQuery(); + if (!resSeq.next()) { + isValid = false; + System.err.println("Missing SEQUENCE_NAME " + seqName + " from SEQUENCE_TABLE"); + } else if (resSeq.getLong(1) < maxId) { + isValid = false; + System.err.println("NEXT_VAL for " + seqName + " in SEQUENCE_TABLE < max(" + tableKey + + ") in " + tableName); + } + } + } + } + + System.out.println(isValid ? "[SUCCESS]\n" :"[FAIL]\n"); + return isValid; + } catch (SQLException e) { + throw new HiveMetaException("Failed to validate sequence number for SEQUENCE_TABLE", e); + } + } + + boolean validateSchemaTables(Connection conn) throws HiveMetaException { + Connection hmsConn = schemaTool.getConnectionToMetastore(false); + + System.out.println("Validating metastore schema tables"); + String version = null; + try { + MetaStoreConnectionInfo connectionInfo = schemaTool.getConnectionInfo(false); + version = schemaTool.getMetaStoreSchemaInfo().getMetaStoreSchemaVersion(connectionInfo); + } catch (HiveMetaException he) { + System.err.println("Failed to determine schema version from Hive Metastore DB. " + he.getMessage()); + System.out.println("Failed in schema table validation."); + LOG.debug("Failed to determine schema version from Hive Metastore DB," + he.getMessage(), he); + return false; + } + + // re-open the hms connection + hmsConn = schemaTool.getConnectionToMetastore(false); + + LOG.debug("Validating tables in the schema for version " + version); + List dbTables = new ArrayList(); + ResultSet rs = null; + try { + String schema = null; + try { + schema = hmsConn.getSchema(); + } catch (SQLFeatureNotSupportedException e) { + LOG.debug("schema is not supported"); + } + + DatabaseMetaData metadata = conn.getMetaData(); + rs = metadata.getTables(null, schema, "%", new String[] {"TABLE"}); + + while (rs.next()) { + String table = rs.getString("TABLE_NAME"); + dbTables.add(table.toLowerCase()); + LOG.debug("Found table " + table + " in HMS dbstore"); + } + } catch (SQLException e) { + throw new HiveMetaException("Failed to retrieve schema tables from Hive Metastore DB," + + e.getMessage(), e); + } finally { + if (rs != null) { + try { + rs.close(); + } catch (SQLException e) { + throw new HiveMetaException("Failed to close resultset", e); + } + } + } + + // parse the schema file to determine the tables that are expected to exist + // we are using oracle schema because it is simpler to parse, no quotes or backticks etc + List schemaTables = new ArrayList(); + List subScripts = new ArrayList(); + + String baseDir = new File(schemaTool.getMetaStoreSchemaInfo().getMetaStoreScriptDir()).getParent(); + String schemaFile = new File(schemaTool.getMetaStoreSchemaInfo().getMetaStoreScriptDir(), + schemaTool.getMetaStoreSchemaInfo().generateInitFileName(version)).getPath(); + try { + LOG.debug("Parsing schema script " + schemaFile); + subScripts.addAll(findCreateTable(schemaFile, schemaTables)); + while (subScripts.size() > 0) { + schemaFile = baseDir + "/" + schemaTool.getDbType() + "/" + subScripts.remove(0); + LOG.debug("Parsing subscript " + schemaFile); + subScripts.addAll(findCreateTable(schemaFile, schemaTables)); + } + } catch (Exception e) { + System.err.println("Exception in parsing schema file. Cause:" + e.getMessage()); + System.out.println("Failed in schema table validation."); + return false; + } + + LOG.debug("Schema tables:[ " + Arrays.toString(schemaTables.toArray()) + " ]"); + LOG.debug("DB tables:[ " + Arrays.toString(dbTables.toArray()) + " ]"); + + // now diff the lists + schemaTables.removeAll(dbTables); + if (schemaTables.size() > 0) { + Collections.sort(schemaTables); + System.err.println("Table(s) [ " + Arrays.toString(schemaTables.toArray()) + " ] " + + "are missing from the metastore database schema."); + System.out.println("[FAIL]\n"); + return false; + } else { + System.out.println("[SUCCESS]\n"); + return true; + } + } + + private List findCreateTable(String path, List tableList) throws Exception { + if (!(new File(path)).exists()) { + throw new Exception(path + " does not exist. Potentially incorrect version in the metastore VERSION table"); + } + + List subs = new ArrayList(); + NestedScriptParser sp = HiveSchemaHelper.getDbCommandParser(schemaTool.getDbType(), false); + Pattern regexp = Pattern.compile("CREATE TABLE(\\s+IF NOT EXISTS)?\\s+(\\S+).*"); + + try (BufferedReader reader = new BufferedReader(new FileReader(path)); ) { + String line = null; + while ((line = reader.readLine()) != null) { + if (sp.isNestedScript(line)) { + String subScript = sp.getScriptName(line); + LOG.debug("Schema subscript " + subScript + " found"); + subs.add(subScript); + continue; + } + line = line.replaceAll("( )+", " "); //suppress multi-spaces + line = line.replaceAll("\\(", " "); + line = line.replaceAll("IF NOT EXISTS ", ""); + line = line.replaceAll("`",""); + line = line.replaceAll("'",""); + line = line.replaceAll("\"",""); + Matcher matcher = regexp.matcher(line); + + if (matcher.find()) { + String table = matcher.group(2); + if (schemaTool.getDbType().equals("derby")) + table = table.replaceAll("APP\\.",""); + tableList.add(table.toLowerCase()); + LOG.debug("Found table " + table + " in the schema"); + } + } + } catch (IOException ex){ + throw new Exception(ex.getMessage()); + } + + return subs; + } + + private boolean validateLocations(Connection conn, URI[] defaultServers) throws HiveMetaException { + System.out.println("Validating DFS locations"); + boolean rtn = true; + rtn &= checkMetaStoreDBLocation(conn, defaultServers); + rtn &= checkMetaStoreTableLocation(conn, defaultServers); + rtn &= checkMetaStorePartitionLocation(conn, defaultServers); + rtn &= checkMetaStoreSkewedColumnsLocation(conn, defaultServers); + System.out.println(rtn ? "[SUCCESS]\n" : "[FAIL]\n"); + return rtn; + } + + private static final String QUERY_DB_LOCATION = + " select dbt.DB_ID, " + + " dbt.NAME, " + + " dbt.DB_LOCATION_URI " + + " from DBS dbt " + + "order by dbt.DB_ID "; + + private boolean checkMetaStoreDBLocation(Connection conn, URI[] defaultServers) throws HiveMetaException { + String dbLocQuery = schemaTool.quote(QUERY_DB_LOCATION); + + int numOfInvalid = 0; + try (Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(dbLocQuery)) { + while (res.next()) { + String locValue = res.getString(3); + String dbName = getNameOrID(res, 2, 1); + if (!checkLocation("Database " + dbName, locValue, defaultServers)) { + numOfInvalid++; + } + } + } catch (SQLException e) { + throw new HiveMetaException("Failed to get DB Location Info.", e); + } + return numOfInvalid == 0; + } + + private static final String TAB_ID_RANGE_QUERY = + "select max(TBL_ID), " + + " min(TBL_ID) " + + " from TBLS "; + + private static final String TAB_LOC_QUERY = + " select tbl.TBL_ID, " + + " tbl.TBL_NAME, " + + " sd.LOCATION, " + + " dbt.DB_ID, " + + " dbt.NAME " + + " from TBLS tbl " + + "inner join SDS sd on sd.SD_ID = tbl.SD_ID " + + "inner join DBS dbt on tbl.DB_ID = dbt.DB_ID " + + " where tbl.TBL_TYPE != '%s' " + + " and tbl.TBL_ID >= ? " + + " and tbl.TBL_ID <= ? " + + " order by tbl.TBL_ID "; + + private static final int TAB_LOC_CHECK_SIZE = 2000; + + private boolean checkMetaStoreTableLocation(Connection conn, URI[] defaultServers) + throws HiveMetaException { + String tabIDRangeQuery = schemaTool.quote(TAB_ID_RANGE_QUERY); + String tabLocQuery = String.format(schemaTool.quote(TAB_LOC_QUERY), TableType.VIRTUAL_VIEW); + + try { + long maxID = 0, minID = 0; + try (Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(tabIDRangeQuery)) { + if (res.next()) { + maxID = res.getLong(1); + minID = res.getLong(2); + } + } + + int numOfInvalid = 0; + try (PreparedStatement pStmt = conn.prepareStatement(tabLocQuery)) { + while (minID <= maxID) { + pStmt.setLong(1, minID); + pStmt.setLong(2, minID + TAB_LOC_CHECK_SIZE); + try (ResultSet res = pStmt.executeQuery()) { + while (res.next()) { + String locValue = res.getString(3); + String entity = "Database " + getNameOrID(res, 5, 4) + ", Table " + getNameOrID(res, 2, 1); + if (!checkLocation(entity, locValue, defaultServers)) { + numOfInvalid++; + } + } + } + minID += TAB_LOC_CHECK_SIZE + 1; + } + } + + return numOfInvalid == 0; + } catch (SQLException e) { + throw new HiveMetaException("Failed to get Table Location Info.", e); + } + } + + private static final String QUERY_PART_ID_RANGE = + "select max(PART_ID)," + + " min(PART_ID)" + + " from PARTITIONS "; + + private static final String QUERY_PART_LOC = + " select pt.PART_ID, " + + " pt.PART_NAME, " + + " sd.LOCATION, " + + " tbl.TBL_ID, " + + " tbl.TBL_NAME, " + + " dbt.DB_ID, " + + " dbt.NAME " + + " from PARTITIONS pt " + + "inner join SDS sd on sd.SD_ID = pt.SD_ID " + + "inner join TBLS tbl on tbl.TBL_ID = pt.TBL_ID " + + "inner join DBS dbt on dbt.DB_ID = tbl.DB_ID " + + " where pt.PART_ID >= ? " + + " and pt.PART_ID <= ? " + + " order by tbl.TBL_ID "; + + private static final int PART_LOC_CHECK_SIZE = 2000; + + private boolean checkMetaStorePartitionLocation(Connection conn, URI[] defaultServers) + throws HiveMetaException { + String queryPartIDRange = schemaTool.quote(QUERY_PART_ID_RANGE); + String queryPartLoc = schemaTool.quote(QUERY_PART_LOC); + + try { + long maxID = 0, minID = 0; + try (Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(queryPartIDRange)) { + if (res.next()) { + maxID = res.getLong(1); + minID = res.getLong(2); + } + } + + int numOfInvalid = 0; + try (PreparedStatement pStmt = conn.prepareStatement(queryPartLoc)) { + while (minID <= maxID) { + pStmt.setLong(1, minID); + pStmt.setLong(2, minID + PART_LOC_CHECK_SIZE); + try (ResultSet res = pStmt.executeQuery()) { + while (res.next()) { + String locValue = res.getString(3); + String entity = "Database " + getNameOrID(res, 7, 6) + ", Table " + getNameOrID(res, 5, 4) + + ", Partition " + getNameOrID(res, 2, 1); + if (!checkLocation(entity, locValue, defaultServers)) { + numOfInvalid++; + } + } + } + minID += PART_LOC_CHECK_SIZE + 1; + } + } + + return numOfInvalid == 0; + } catch (SQLException e) { + throw new HiveMetaException("Failed to get Partition Location Info.", e); + } + } + + private static final String QUERY_SKEWED_COL_ID_RANGE = + "select max(STRING_LIST_ID_KID), " + + " min(STRING_LIST_ID_KID) " + + " from SKEWED_COL_VALUE_LOC_MAP "; + + private static final String QUERY_SKEWED_COL_LOC = + " select t.TBL_NAME, " + + " t.TBL_ID, " + + " sk.STRING_LIST_ID_KID, " + + " sk.LOCATION, " + + " db.NAME, " + + " db.DB_ID " + + " from TBLS t " + + " join SDS s on s.SD_ID = t.SD_ID " + + " join DBS db on db.DB_ID = t.DB_ID " + + " join SKEWED_COL_VALUE_LOC_MAP sk on sk.SD_ID = s.SD_ID " + + " where sk.STRING_LIST_ID_KID >= ? " + + " and sk.STRING_LIST_ID_KID <= ? " + + "order by t.TBL_ID "; + + private static final int SKEWED_COL_LOC_CHECK_SIZE = 2000; + + private boolean checkMetaStoreSkewedColumnsLocation(Connection conn, URI[] defaultServers) + throws HiveMetaException { + String querySkewedColIDRange = schemaTool.quote(QUERY_SKEWED_COL_ID_RANGE); + String querySkewedColLoc = schemaTool.quote(QUERY_SKEWED_COL_LOC); + + try { + long maxID = 0, minID = 0; + try (Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(querySkewedColIDRange)) { + if (res.next()) { + maxID = res.getLong(1); + minID = res.getLong(2); + } + } + + int numOfInvalid = 0; + try (PreparedStatement pStmt = conn.prepareStatement(querySkewedColLoc)) { + while (minID <= maxID) { + pStmt.setLong(1, minID); + pStmt.setLong(2, minID + SKEWED_COL_LOC_CHECK_SIZE); + try (ResultSet res = pStmt.executeQuery()) { + while (res.next()) { + String locValue = res.getString(4); + String entity = "Database " + getNameOrID(res, 5, 6) + ", Table " + getNameOrID(res, 1, 2) + + ", String list " + res.getString(3); + if (!checkLocation(entity, locValue, defaultServers)) { + numOfInvalid++; + } + } + } + minID += SKEWED_COL_LOC_CHECK_SIZE + 1; + } + } + + return numOfInvalid == 0; + } catch (SQLException e) { + throw new HiveMetaException("Failed to get skewed columns location info.", e); + } + } + + /** + * Check if the location is valid for the given entity + * @param entity the entity to represent a database, partition or table + * @param entityLocation the location + * @param defaultServers a list of the servers that the location needs to match. + * The location host needs to match one of the given servers. + * If empty, then no check against such list. + * @return true if the location is valid + */ + private boolean checkLocation(String entity, String entityLocation, URI[] defaultServers) { + boolean isValid = true; + + if (entityLocation == null) { + System.err.println(entity + ", Error: empty location"); + isValid = false; + } else { + try { + URI currentUri = new Path(entityLocation).toUri(); + String scheme = currentUri.getScheme(); + String path = currentUri.getPath(); + if (StringUtils.isEmpty(scheme)) { + System.err.println(entity + ", Location: "+ entityLocation + ", Error: missing location scheme."); + isValid = false; + } else if (StringUtils.isEmpty(path)) { + System.err.println(entity + ", Location: "+ entityLocation + ", Error: missing location path."); + isValid = false; + } else if (ArrayUtils.isNotEmpty(defaultServers) && currentUri.getAuthority() != null) { + String authority = currentUri.getAuthority(); + boolean matchServer = false; + for(URI server : defaultServers) { + if (StringUtils.equalsIgnoreCase(server.getScheme(), scheme) && + StringUtils.equalsIgnoreCase(server.getAuthority(), authority)) { + matchServer = true; + break; + } + } + if (!matchServer) { + System.err.println(entity + ", Location: " + entityLocation + ", Error: mismatched server."); + isValid = false; + } + } + + // if there is no path element other than "/", report it but not fail + if (isValid && StringUtils.containsOnly(path, "/")) { + System.err.println(entity + ", Location: "+ entityLocation + ", Warn: location set to root, not a recommended config."); + } + } catch (Exception pe) { + System.err.println(entity + ", Error: invalid location - " + pe.getMessage()); + isValid =false; + } + } + + return isValid; + } + + private String getNameOrID(ResultSet res, int nameInx, int idInx) throws SQLException { + String itemName = res.getString(nameInx); + return (itemName == null || itemName.isEmpty()) ? "ID: " + res.getString(idInx) : "Name: " + itemName; + } + + private static final String QUERY_COLUMN_NULL_VALUES = + " select t.*" + + " from TBLS t" + + " where t.SD_ID IS NULL" + + " and (t.TBL_TYPE = '" + TableType.EXTERNAL_TABLE + "' or" + + " t.TBL_TYPE = '" + TableType.MANAGED_TABLE + "') " + + "order by t.TBL_ID "; + + private boolean validateColumnNullValues(Connection conn) throws HiveMetaException { + System.out.println("Validating columns for incorrect NULL values."); + + boolean isValid = true; + String queryColumnNullValues = schemaTool.quote(QUERY_COLUMN_NULL_VALUES); + + try (Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(queryColumnNullValues)) { + while (res.next()) { + long tableId = res.getLong("TBL_ID"); + String tableName = res.getString("TBL_NAME"); + String tableType = res.getString("TBL_TYPE"); + isValid = false; + System.err.println("SD_ID in TBLS should not be NULL for Table Name=" + tableName + ", Table ID=" + tableId + ", Table Type=" + tableType); + } + + System.out.println(isValid ? "[SUCCESS]\n" : "[FAIL]\n"); + return isValid; + } catch(SQLException e) { + throw new HiveMetaException("Failed to validate columns for incorrect NULL values", e); + } + } +} diff --git a/beeline/src/test/org/apache/hive/beeline/schematool/TestHiveSchemaTool.java b/beeline/src/test/org/apache/hive/beeline/schematool/TestHiveSchemaTool.java new file mode 100644 index 0000000..8b477bd --- /dev/null +++ b/beeline/src/test/org/apache/hive/beeline/schematool/TestHiveSchemaTool.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline.schematool; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.powermock.core.classloader.annotations.PowerMockIgnore; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.eq; +import static org.mockito.Matchers.same; +import static org.mockito.Mockito.when; +import static org.powermock.api.mockito.PowerMockito.mockStatic; +import static org.powermock.api.mockito.PowerMockito.verifyStatic; + +@RunWith(PowerMockRunner.class) +@PowerMockIgnore("javax.management.*") +@PrepareForTest({ HiveSchemaHelper.class, HiveSchemaTool.CommandBuilder.class }) +public class TestHiveSchemaTool { + + String scriptFile = System.getProperty("java.io.tmpdir") + File.separator + "someScript.sql"; + @Mock + private HiveConf hiveConf; + private HiveSchemaTool.CommandBuilder builder; + private String pasword = "reallySimplePassword"; + + @Before + public void setup() throws IOException { + mockStatic(HiveSchemaHelper.class); + when(HiveSchemaHelper + .getValidConfVar(eq(MetastoreConf.ConfVars.CONNECT_URL_KEY), same(hiveConf))) + .thenReturn("someURL"); + when(HiveSchemaHelper + .getValidConfVar(eq(MetastoreConf.ConfVars.CONNECTION_DRIVER), same(hiveConf))) + .thenReturn("someDriver"); + + File file = new File(scriptFile); + if (!file.exists()) { + file.createNewFile(); + } + builder = new HiveSchemaTool.CommandBuilder(hiveConf, null, null, "testUser", pasword, scriptFile); + } + + @After + public void globalAssert() throws IOException { + verifyStatic(); + HiveSchemaHelper.getValidConfVar(eq(MetastoreConf.ConfVars.CONNECT_URL_KEY), same(hiveConf)); + HiveSchemaHelper + .getValidConfVar(eq(MetastoreConf.ConfVars.CONNECTION_DRIVER), same(hiveConf)); + + new File(scriptFile).delete(); + } + + @Test + public void shouldReturnStrippedPassword() throws IOException { + assertFalse(builder.buildToLog().contains(pasword)); + } + + @Test + public void shouldReturnActualPassword() throws IOException { + String[] strings = builder.buildToRun(); + assertTrue(Arrays.asList(strings).contains(pasword)); + } +} diff --git a/bin/ext/schemaTool.sh b/bin/ext/schemaTool.sh index 94c56ef..e30bc11 100644 --- a/bin/ext/schemaTool.sh +++ b/bin/ext/schemaTool.sh @@ -18,12 +18,12 @@ export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} " schemaTool() { HIVE_OPTS='' - CLASS=org.apache.hive.beeline.HiveSchemaTool + CLASS=org.apache.hive.beeline.schematool.HiveSchemaTool execHiveCmd $CLASS "$@" } schemaTool_help () { HIVE_OPTS='' - CLASS=org.apache.hive.beeline.HiveSchemaTool + CLASS=org.apache.hive.beelinei.schematool.HiveSchemaTool execHiveCmd $CLASS "--help" }