diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLine.java b/beeline/src/java/org/apache/hive/beeline/BeeLine.java index 860d36e..65e3dd6b 100644 --- a/beeline/src/java/org/apache/hive/beeline/BeeLine.java +++ b/beeline/src/java/org/apache/hive/beeline/BeeLine.java @@ -172,7 +172,7 @@ private static final String HIVE_VAR_PREFIX = "--hivevar"; private static final String HIVE_CONF_PREFIX = "--hiveconf"; private static final String PROP_FILE_PREFIX = "--property-file"; - static final String PASSWD_MASK = "[passwd stripped]"; + public static final String PASSWD_MASK = "[passwd stripped]"; private final Map formats = map(new Object[] { "vertical", new VerticalOutputFormat(this), diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java index 85052d9..3877b5c 100644 --- a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java +++ b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java @@ -48,7 +48,7 @@ import jline.console.history.MemoryHistory; import org.apache.hadoop.hive.conf.HiveConf; -class BeeLineOpts implements Completer { +public class BeeLineOpts implements Completer { public static final int DEFAULT_MAX_WIDTH = 80; public static final int DEFAULT_MAX_HEIGHT = 80; public static final int DEFAULT_HEADER_INTERVAL = 100; diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java deleted file mode 100644 index 262eaa2..0000000 --- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ /dev/null @@ -1,1563 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.beeline; - -import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.cli.OptionGroup; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.commons.io.output.NullOutputStream; -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaException; -import org.apache.hadoop.hive.metastore.IMetaStoreSchemaInfo; -import org.apache.hadoop.hive.metastore.MetaStoreSchemaInfoFactory; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; -import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.MetaStoreConnectionInfo; -import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.NestedScriptParser; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.ImmutableMap; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintStream; -import java.net.URI; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.SQLFeatureNotSupportedException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; - -public class HiveSchemaTool { - private String userName = null; - private String passWord = null; - private boolean dryRun = false; - private boolean verbose = false; - private String dbOpts = null; - private String url = null; - private String driver = null; - private URI[] validationServers = null; // The list of servers the database/partition/table can locate on - private final HiveConf hiveConf; - private final String dbType; - private final String metaDbType; - private final IMetaStoreSchemaInfo metaStoreSchemaInfo; - private boolean needsQuotedIdentifier; - private String quoteCharacter; - - static final private Logger LOG = LoggerFactory.getLogger(HiveSchemaTool.class.getName()); - - public HiveSchemaTool(String dbType, String metaDbType) throws HiveMetaException { - this(System.getenv("HIVE_HOME"), new HiveConf(HiveSchemaTool.class), dbType, metaDbType); - } - - public HiveSchemaTool(String hiveHome, HiveConf hiveConf, String dbType, String metaDbType) - throws HiveMetaException { - if (hiveHome == null || hiveHome.isEmpty()) { - throw new HiveMetaException("No Hive home directory provided"); - } - this.hiveConf = hiveConf; - this.dbType = dbType; - this.metaDbType = metaDbType; - NestedScriptParser parser = getDbCommandParser(dbType, metaDbType); - this.needsQuotedIdentifier = parser.needsQuotedIdentifier(); - this.quoteCharacter = parser.getQuoteCharacter(); - this.metaStoreSchemaInfo = MetaStoreSchemaInfoFactory.get(hiveConf, hiveHome, dbType); - // If the dbType is "hive", this is setting up the information schema in Hive. - // We will set the default jdbc url and driver. - // It is overriden by command line options if passed (-url and -driver - if (dbType.equalsIgnoreCase(HiveSchemaHelper.DB_HIVE)) { - url = HiveSchemaHelper.EMBEDDED_HS2_URL; - driver = HiveSchemaHelper.HIVE_JDBC_DRIVER; - } - } - - public HiveConf getHiveConf() { - return hiveConf; - } - - public void setUrl(String url) { - this.url = url; - } - - public void setDriver(String driver) { - this.driver = driver; - } - - public void setUserName(String userName) { - this.userName = userName; - } - - public void setPassWord(String passWord) { - this.passWord = passWord; - } - - public void setDryRun(boolean dryRun) { - this.dryRun = dryRun; - } - - public void setVerbose(boolean verbose) { - this.verbose = verbose; - } - - public void setDbOpts(String dbOpts) { - this.dbOpts = dbOpts; - } - - public void setValidationServers(String servers) { - if(StringUtils.isNotEmpty(servers)) { - String[] strServers = servers.split(","); - this.validationServers = new URI[strServers.length]; - for (int i = 0; i < validationServers.length; i++) { - validationServers[i] = new Path(strServers[i]).toUri(); - } - } - } - - private static void printAndExit(Options cmdLineOptions) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("schemaTool", cmdLineOptions); - System.exit(1); - } - - Connection getConnectionToMetastore(boolean printInfo) throws HiveMetaException { - return HiveSchemaHelper.getConnectionToMetastore(userName, passWord, url, driver, printInfo, hiveConf, - null); - } - - private NestedScriptParser getDbCommandParser(String dbType, String metaDbType) { - return HiveSchemaHelper.getDbCommandParser(dbType, dbOpts, userName, passWord, hiveConf, - metaDbType, false); - } - - /*** - * Print Hive version and schema version - * @throws MetaException - */ - public void showInfo() throws HiveMetaException { - String hiveVersion = metaStoreSchemaInfo.getHiveSchemaVersion(); - String dbVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(true)); - System.out.println("Hive distribution version:\t " + hiveVersion); - System.out.println("Metastore schema version:\t " + dbVersion); - assertCompatibleVersion(hiveVersion, dbVersion); - } - - boolean validateLocations(Connection conn, URI[] defaultServers) throws HiveMetaException { - System.out.println("Validating DFS locations"); - boolean rtn; - rtn = checkMetaStoreDBLocation(conn, defaultServers); - rtn = checkMetaStoreTableLocation(conn, defaultServers) && rtn; - rtn = checkMetaStorePartitionLocation(conn, defaultServers) && rtn; - rtn = checkMetaStoreSkewedColumnsLocation(conn, defaultServers) && rtn; - System.out.println((rtn ? "Succeeded" : "Failed") + " in DFS location validation."); - return rtn; - } - - private String getNameOrID(ResultSet res, int nameInx, int idInx) throws SQLException { - String itemName = res.getString(nameInx); - return (itemName == null || itemName.isEmpty()) ? "ID: " + res.getString(idInx) : "Name: " + itemName; - } - - private boolean checkMetaStoreDBLocation(Connection conn, URI[] defaultServers) - throws HiveMetaException { - String dbLoc; - boolean isValid = true; - int numOfInvalid = 0; - if (needsQuotedIdentifier) { - dbLoc = "select dbt.\"DB_ID\", dbt.\"NAME\", dbt.\"DB_LOCATION_URI\" from \"DBS\" dbt order by dbt.\"DB_ID\" "; - } else { - dbLoc = "select dbt.DB_ID, dbt.NAME, dbt.DB_LOCATION_URI from DBS dbt order by dbt.DB_ID"; - } - - try(Statement stmt = conn.createStatement(); - ResultSet res = stmt.executeQuery(dbLoc)) { - while (res.next()) { - String locValue = res.getString(3); - String dbName = getNameOrID(res,2,1); - if (!checkLocation("Database " + dbName, locValue, defaultServers)) { - numOfInvalid++; - } - } - } catch (SQLException e) { - throw new HiveMetaException("Failed to get DB Location Info.", e); - } - if (numOfInvalid > 0) { - isValid = false; - } - return isValid; - } - - private boolean checkMetaStoreTableLocation(Connection conn, URI[] defaultServers) - throws HiveMetaException { - String tabLoc, tabIDRange; - boolean isValid = true; - int numOfInvalid = 0; - if (needsQuotedIdentifier) { - tabIDRange = "select max(\"TBL_ID\"), min(\"TBL_ID\") from \"TBLS\" "; - } else { - tabIDRange = "select max(TBL_ID), min(TBL_ID) from TBLS"; - } - - if (needsQuotedIdentifier) { - tabLoc = "select tbl.\"TBL_ID\", tbl.\"TBL_NAME\", sd.\"LOCATION\", dbt.\"DB_ID\", dbt.\"NAME\" from \"TBLS\" tbl inner join " + - "\"SDS\" sd on tbl.\"SD_ID\" = sd.\"SD_ID\" and tbl.\"TBL_TYPE\" != '" + TableType.VIRTUAL_VIEW + - "' and tbl.\"TBL_ID\" >= ? and tbl.\"TBL_ID\"<= ? " + "inner join \"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" order by tbl.\"TBL_ID\" "; - } else { - tabLoc = "select tbl.TBL_ID, tbl.TBL_NAME, sd.LOCATION, dbt.DB_ID, dbt.NAME from TBLS tbl join SDS sd on tbl.SD_ID = sd.SD_ID and tbl.TBL_TYPE !='" - + TableType.VIRTUAL_VIEW + "' and tbl.TBL_ID >= ? and tbl.TBL_ID <= ? inner join DBS dbt on tbl.DB_ID = dbt.DB_ID order by tbl.TBL_ID"; - } - - long maxID = 0, minID = 0; - long rtnSize = 2000; - - try { - Statement stmt = conn.createStatement(); - ResultSet res = stmt.executeQuery(tabIDRange); - if (res.next()) { - maxID = res.getLong(1); - minID = res.getLong(2); - } - res.close(); - stmt.close(); - PreparedStatement pStmt = conn.prepareStatement(tabLoc); - while (minID <= maxID) { - pStmt.setLong(1, minID); - pStmt.setLong(2, minID + rtnSize); - res = pStmt.executeQuery(); - while (res.next()) { - String locValue = res.getString(3); - String entity = "Database " + getNameOrID(res, 5, 4) + - ", Table " + getNameOrID(res,2,1); - if (!checkLocation(entity, locValue, defaultServers)) { - numOfInvalid++; - } - } - res.close(); - minID += rtnSize + 1; - - } - pStmt.close(); - - } catch (SQLException e) { - throw new HiveMetaException("Failed to get Table Location Info.", e); - } - if (numOfInvalid > 0) { - isValid = false; - } - return isValid; - } - - private boolean checkMetaStorePartitionLocation(Connection conn, URI[] defaultServers) - throws HiveMetaException { - String partLoc, partIDRange; - boolean isValid = true; - int numOfInvalid = 0; - if (needsQuotedIdentifier) { - partIDRange = "select max(\"PART_ID\"), min(\"PART_ID\") from \"PARTITIONS\" "; - } else { - partIDRange = "select max(PART_ID), min(PART_ID) from PARTITIONS"; - } - - if (needsQuotedIdentifier) { - partLoc = "select pt.\"PART_ID\", pt.\"PART_NAME\", sd.\"LOCATION\", tbl.\"TBL_ID\", tbl.\"TBL_NAME\",dbt.\"DB_ID\", dbt.\"NAME\" from \"PARTITIONS\" pt " - + "inner join \"SDS\" sd on pt.\"SD_ID\" = sd.\"SD_ID\" and pt.\"PART_ID\" >= ? and pt.\"PART_ID\"<= ? " - + " inner join \"TBLS\" tbl on pt.\"TBL_ID\" = tbl.\"TBL_ID\" inner join " - + "\"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" order by tbl.\"TBL_ID\" "; - } else { - partLoc = "select pt.PART_ID, pt.PART_NAME, sd.LOCATION, tbl.TBL_ID, tbl.TBL_NAME, dbt.DB_ID, dbt.NAME from PARTITIONS pt " - + "inner join SDS sd on pt.SD_ID = sd.SD_ID and pt.PART_ID >= ? and pt.PART_ID <= ? " - + "inner join TBLS tbl on tbl.TBL_ID = pt.TBL_ID inner join DBS dbt on tbl.DB_ID = dbt.DB_ID order by tbl.TBL_ID "; - } - - long maxID = 0, minID = 0; - long rtnSize = 2000; - - try { - Statement stmt = conn.createStatement(); - ResultSet res = stmt.executeQuery(partIDRange); - if (res.next()) { - maxID = res.getLong(1); - minID = res.getLong(2); - } - res.close(); - stmt.close(); - PreparedStatement pStmt = conn.prepareStatement(partLoc); - while (minID <= maxID) { - pStmt.setLong(1, minID); - pStmt.setLong(2, minID + rtnSize); - res = pStmt.executeQuery(); - while (res.next()) { - String locValue = res.getString(3); - String entity = "Database " + getNameOrID(res,7,6) + - ", Table " + getNameOrID(res,5,4) + - ", Partition " + getNameOrID(res,2,1); - if (!checkLocation(entity, locValue, defaultServers)) { - numOfInvalid++; - } - } - res.close(); - minID += rtnSize + 1; - } - pStmt.close(); - } catch (SQLException e) { - throw new HiveMetaException("Failed to get Partition Location Info.", e); - } - if (numOfInvalid > 0) { - isValid = false; - } - return isValid; - } - - private boolean checkMetaStoreSkewedColumnsLocation(Connection conn, URI[] defaultServers) - throws HiveMetaException { - String skewedColLoc, skewedColIDRange; - boolean isValid = true; - int numOfInvalid = 0; - if (needsQuotedIdentifier) { - skewedColIDRange = "select max(\"STRING_LIST_ID_KID\"), min(\"STRING_LIST_ID_KID\") from \"SKEWED_COL_VALUE_LOC_MAP\" "; - } else { - skewedColIDRange = "select max(STRING_LIST_ID_KID), min(STRING_LIST_ID_KID) from SKEWED_COL_VALUE_LOC_MAP"; - } - - if (needsQuotedIdentifier) { - skewedColLoc = "select t.\"TBL_NAME\", t.\"TBL_ID\", sk.\"STRING_LIST_ID_KID\", sk.\"LOCATION\", db.\"NAME\", db.\"DB_ID\" " - + " from \"TBLS\" t, \"SDS\" s, \"DBS\" db, \"SKEWED_COL_VALUE_LOC_MAP\" sk " - + "where sk.\"SD_ID\" = s.\"SD_ID\" and s.\"SD_ID\" = t.\"SD_ID\" and t.\"DB_ID\" = db.\"DB_ID\" and " - + "sk.\"STRING_LIST_ID_KID\" >= ? and sk.\"STRING_LIST_ID_KID\" <= ? order by t.\"TBL_ID\" "; - } else { - skewedColLoc = "select t.TBL_NAME, t.TBL_ID, sk.STRING_LIST_ID_KID, sk.LOCATION, db.NAME, db.DB_ID from TBLS t, SDS s, DBS db, SKEWED_COL_VALUE_LOC_MAP sk " - + "where sk.SD_ID = s.SD_ID and s.SD_ID = t.SD_ID and t.DB_ID = db.DB_ID and sk.STRING_LIST_ID_KID >= ? and sk.STRING_LIST_ID_KID <= ? order by t.TBL_ID "; - } - - long maxID = 0, minID = 0; - long rtnSize = 2000; - - try { - Statement stmt = conn.createStatement(); - ResultSet res = stmt.executeQuery(skewedColIDRange); - if (res.next()) { - maxID = res.getLong(1); - minID = res.getLong(2); - } - res.close(); - stmt.close(); - PreparedStatement pStmt = conn.prepareStatement(skewedColLoc); - while (minID <= maxID) { - pStmt.setLong(1, minID); - pStmt.setLong(2, minID + rtnSize); - res = pStmt.executeQuery(); - while (res.next()) { - String locValue = res.getString(4); - String entity = "Database " + getNameOrID(res,5,6) + - ", Table " + getNameOrID(res,1,2) + - ", String list " + res.getString(3); - if (!checkLocation(entity, locValue, defaultServers)) { - numOfInvalid++; - } - } - res.close(); - minID += rtnSize + 1; - } - pStmt.close(); - } catch (SQLException e) { - throw new HiveMetaException("Failed to get skewed columns location info.", e); - } - if (numOfInvalid > 0) { - isValid = false; - } - return isValid; - } - - /** - * Check if the location is valid for the given entity - * @param entity the entity to represent a database, partition or table - * @param entityLocation the location - * @param defaultServers a list of the servers that the location needs to match. - * The location host needs to match one of the given servers. - * If empty, then no check against such list. - * @return true if the location is valid - */ - private boolean checkLocation( - String entity, - String entityLocation, - URI[] defaultServers) { - boolean isValid = true; - if (entityLocation == null) { - System.err.println(entity + ", Error: empty location"); - isValid = false; - } else { - try { - URI currentUri = new Path(entityLocation).toUri(); - String scheme = currentUri.getScheme(); - String path = currentUri.getPath(); - if (StringUtils.isEmpty(scheme)) { - System.err.println(entity + ", Location: "+ entityLocation + ", Error: missing location scheme."); - isValid = false; - } else if (StringUtils.isEmpty(path)) { - System.err.println(entity + ", Location: "+ entityLocation + ", Error: missing location path."); - isValid = false; - } else if (ArrayUtils.isNotEmpty(defaultServers) && currentUri.getAuthority() != null) { - String authority = currentUri.getAuthority(); - boolean matchServer = false; - for(URI server : defaultServers) { - if (StringUtils.equalsIgnoreCase(server.getScheme(), scheme) && - StringUtils.equalsIgnoreCase(server.getAuthority(), authority)) { - matchServer = true; - break; - } - } - if (!matchServer) { - System.err.println(entity + ", Location: " + entityLocation + ", Error: mismatched server."); - isValid = false; - } - } - - // if there is no path element other than "/", report it but not fail - if (isValid && StringUtils.containsOnly(path, "/")) { - System.err.println(entity + ", Location: "+ entityLocation + ", Warn: location set to root, not a recommended config."); - } - } catch (Exception pe) { - System.err.println(entity + ", Error: invalid location - " + pe.getMessage()); - isValid =false; - } - } - - return isValid; - } - - // test the connection metastore using the config property - private void testConnectionToMetastore() throws HiveMetaException { - Connection conn = getConnectionToMetastore(true); - try { - conn.close(); - } catch (SQLException e) { - throw new HiveMetaException("Failed to close metastore connection", e); - } - } - - - /** - * check if the current schema version in metastore matches the Hive version - * @throws MetaException - */ - public void verifySchemaVersion() throws HiveMetaException { - // don't check version if its a dry run - if (dryRun) { - return; - } - String newSchemaVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(false)); - // verify that the new version is added to schema - assertCompatibleVersion(metaStoreSchemaInfo.getHiveSchemaVersion(), newSchemaVersion); - } - - private void assertCompatibleVersion(String hiveSchemaVersion, String dbSchemaVersion) - throws HiveMetaException { - if (!metaStoreSchemaInfo.isVersionCompatible(hiveSchemaVersion, dbSchemaVersion)) { - throw new HiveMetaException("Metastore schema version is not compatible. Hive Version: " - + hiveSchemaVersion + ", Database Schema Version: " + dbSchemaVersion); - } - } - - /** - * Perform metastore schema upgrade. extract the current schema version from metastore - * @throws MetaException - */ - public void doUpgrade() throws HiveMetaException { - String fromVersion = - metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(false)); - if (fromVersion == null || fromVersion.isEmpty()) { - throw new HiveMetaException("Schema version not stored in the metastore. " + - "Metastore schema is too old or corrupt. Try specifying the version manually"); - } - doUpgrade(fromVersion); - } - - private MetaStoreConnectionInfo getConnectionInfo(boolean printInfo) { - return new MetaStoreConnectionInfo(userName, passWord, url, driver, printInfo, hiveConf, - dbType, metaDbType); - } - /** - * Perform metastore schema upgrade - * - * @param fromSchemaVer - * Existing version of the metastore. If null, then read from the metastore - * @throws MetaException - */ - public void doUpgrade(String fromSchemaVer) throws HiveMetaException { - if (metaStoreSchemaInfo.getHiveSchemaVersion().equals(fromSchemaVer)) { - System.out.println("No schema upgrade required from version " + fromSchemaVer); - return; - } - // Find the list of scripts to execute for this upgrade - List upgradeScripts = - metaStoreSchemaInfo.getUpgradeScripts(fromSchemaVer); - testConnectionToMetastore(); - System.out.println("Starting upgrade metastore schema from version " + - fromSchemaVer + " to " + metaStoreSchemaInfo.getHiveSchemaVersion()); - String scriptDir = metaStoreSchemaInfo.getMetaStoreScriptDir(); - try { - for (String scriptFile : upgradeScripts) { - System.out.println("Upgrade script " + scriptFile); - if (!dryRun) { - runPreUpgrade(scriptDir, scriptFile); - runBeeLine(scriptDir, scriptFile); - System.out.println("Completed " + scriptFile); - } - } - } catch (IOException eIO) { - throw new HiveMetaException( - "Upgrade FAILED! Metastore state would be inconsistent !!", eIO); - } - - // Revalidated the new version after upgrade - verifySchemaVersion(); - } - - /** - * Initialize the metastore schema to current version - * - * @throws MetaException - */ - public void doInit() throws HiveMetaException { - doInit(metaStoreSchemaInfo.getHiveSchemaVersion()); - - // Revalidated the new version after upgrade - verifySchemaVersion(); - } - - /** - * Initialize the metastore schema - * - * @param toVersion - * If null then current hive version is used - * @throws MetaException - */ - public void doInit(String toVersion) throws HiveMetaException { - testConnectionToMetastore(); - System.out.println("Starting metastore schema initialization to " + toVersion); - - String initScriptDir = metaStoreSchemaInfo.getMetaStoreScriptDir(); - String initScriptFile = metaStoreSchemaInfo.generateInitFileName(toVersion); - - try { - System.out.println("Initialization script " + initScriptFile); - if (!dryRun) { - runBeeLine(initScriptDir, initScriptFile); - System.out.println("Initialization script completed"); - } - } catch (IOException e) { - throw new HiveMetaException("Schema initialization FAILED!" + - " Metastore state would be inconsistent !!", e); - } - } - - public void doValidate() throws HiveMetaException { - System.out.println("Starting metastore validation\n"); - Connection conn = getConnectionToMetastore(false); - boolean success = true; - try { - if (validateSchemaVersions()) { - System.out.println("[SUCCESS]\n"); - } else { - success = false; - System.out.println("[FAIL]\n"); - } - if (validateSequences(conn)) { - System.out.println("[SUCCESS]\n"); - } else { - success = false; - System.out.println("[FAIL]\n"); - } - if (validateSchemaTables(conn)) { - System.out.println("[SUCCESS]\n"); - } else { - success = false; - System.out.println("[FAIL]\n"); - } - if (validateLocations(conn, this.validationServers)) { - System.out.println("[SUCCESS]\n"); - } else { - System.out.println("[WARN]\n"); - } - if (validateColumnNullValues(conn)) { - System.out.println("[SUCCESS]\n"); - } else { - System.out.println("[WARN]\n"); - } - } finally { - if (conn != null) { - try { - conn.close(); - } catch (SQLException e) { - throw new HiveMetaException("Failed to close metastore connection", e); - } - } - } - - System.out.print("Done with metastore validation: "); - if (!success) { - System.out.println("[FAIL]"); - System.exit(1); - } else { - System.out.println("[SUCCESS]"); - } - } - - boolean validateSequences(Connection conn) throws HiveMetaException { - Map> seqNameToTable = - new ImmutableMap.Builder>() - .put("MDatabase", Pair.of("DBS", "DB_ID")) - .put("MRole", Pair.of("ROLES", "ROLE_ID")) - .put("MGlobalPrivilege", Pair.of("GLOBAL_PRIVS", "USER_GRANT_ID")) - .put("MTable", Pair.of("TBLS","TBL_ID")) - .put("MStorageDescriptor", Pair.of("SDS", "SD_ID")) - .put("MSerDeInfo", Pair.of("SERDES", "SERDE_ID")) - .put("MColumnDescriptor", Pair.of("CDS", "CD_ID")) - .put("MTablePrivilege", Pair.of("TBL_PRIVS", "TBL_GRANT_ID")) - .put("MTableColumnStatistics", Pair.of("TAB_COL_STATS", "CS_ID")) - .put("MPartition", Pair.of("PARTITIONS", "PART_ID")) - .put("MPartitionColumnStatistics", Pair.of("PART_COL_STATS", "CS_ID")) - .put("MFunction", Pair.of("FUNCS", "FUNC_ID")) - .put("MIndex", Pair.of("IDXS", "INDEX_ID")) - .put("MStringList", Pair.of("SKEWED_STRING_LIST", "STRING_LIST_ID")) - .build(); - - System.out.println("Validating sequence number for SEQUENCE_TABLE"); - - boolean isValid = true; - try { - Statement stmt = conn.createStatement(); - for (String seqName : seqNameToTable.keySet()) { - String tableName = seqNameToTable.get(seqName).getLeft(); - String tableKey = seqNameToTable.get(seqName).getRight(); - String fullSequenceName = "org.apache.hadoop.hive.metastore.model." + seqName; - String seqQuery = needsQuotedIdentifier ? - ("select t.\"NEXT_VAL\" from \"SEQUENCE_TABLE\" t WHERE t.\"SEQUENCE_NAME\"=? order by t.\"SEQUENCE_NAME\" ") - : ("select t.NEXT_VAL from SEQUENCE_TABLE t WHERE t.SEQUENCE_NAME=? order by t.SEQUENCE_NAME "); - String maxIdQuery = needsQuotedIdentifier ? - ("select max(\"" + tableKey + "\") from \"" + tableName + "\"") - : ("select max(" + tableKey + ") from " + tableName); - - ResultSet res = stmt.executeQuery(maxIdQuery); - if (res.next()) { - long maxId = res.getLong(1); - if (maxId > 0) { - PreparedStatement pStmt = conn.prepareStatement(seqQuery); - pStmt.setString(1, fullSequenceName); - ResultSet resSeq = pStmt.executeQuery(); - if (!resSeq.next()) { - isValid = false; - System.err.println("Missing SEQUENCE_NAME " + seqName + " from SEQUENCE_TABLE"); - } else if (resSeq.getLong(1) < maxId) { - isValid = false; - System.err.println("NEXT_VAL for " + seqName + " in SEQUENCE_TABLE < max(" + - tableKey + ") in " + tableName); - } - } - } - } - - System.out.println((isValid ? "Succeeded" :"Failed") + " in sequence number validation for SEQUENCE_TABLE."); - return isValid; - } catch(SQLException e) { - throw new HiveMetaException("Failed to validate sequence number for SEQUENCE_TABLE", e); - } - } - - boolean validateSchemaVersions() throws HiveMetaException { - System.out.println("Validating schema version"); - try { - String newSchemaVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(false)); - assertCompatibleVersion(metaStoreSchemaInfo.getHiveSchemaVersion(), newSchemaVersion); - } catch (HiveMetaException hme) { - if (hme.getMessage().contains("Metastore schema version is not compatible") - || hme.getMessage().contains("Multiple versions were found in metastore") - || hme.getMessage().contains("Could not find version info in metastore VERSION table")) { - System.err.println(hme.getMessage()); - System.out.println("Failed in schema version validation."); - return false; - } else { - throw hme; - } - } - System.out.println("Succeeded in schema version validation."); - return true; - } - - boolean validateSchemaTables(Connection conn) throws HiveMetaException { - String version = null; - ResultSet rs = null; - DatabaseMetaData metadata = null; - List dbTables = new ArrayList(); - List schemaTables = new ArrayList(); - List subScripts = new ArrayList(); - Connection hmsConn = getConnectionToMetastore(false); - - System.out.println("Validating metastore schema tables"); - try { - version = metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(false)); - } catch (HiveMetaException he) { - System.err.println("Failed to determine schema version from Hive Metastore DB. " + he.getMessage()); - System.out.println("Failed in schema table validation."); - LOG.debug("Failed to determine schema version from Hive Metastore DB," + he.getMessage()); - return false; - } - - // re-open the hms connection - hmsConn = getConnectionToMetastore(false); - - LOG.debug("Validating tables in the schema for version " + version); - try { - String schema = null; - try { - schema = hmsConn.getSchema(); - } catch (SQLFeatureNotSupportedException e) { - LOG.debug("schema is not supported"); - } - - metadata = conn.getMetaData(); - String[] types = {"TABLE"}; - rs = metadata.getTables(null, schema, "%", types); - String table = null; - - while (rs.next()) { - table = rs.getString("TABLE_NAME"); - dbTables.add(table.toLowerCase()); - LOG.debug("Found table " + table + " in HMS dbstore"); - } - } catch (SQLException e) { - throw new HiveMetaException("Failed to retrieve schema tables from Hive Metastore DB", e); - } finally { - if (rs != null) { - try { - rs.close(); - } catch (SQLException e) { - throw new HiveMetaException("Failed to close resultset", e); - } - } - } - - // parse the schema file to determine the tables that are expected to exist - // we are using oracle schema because it is simpler to parse, no quotes or backticks etc - String baseDir = new File(metaStoreSchemaInfo.getMetaStoreScriptDir()).getParent(); - String schemaFile = new File(metaStoreSchemaInfo.getMetaStoreScriptDir(), - metaStoreSchemaInfo.generateInitFileName(version)).getPath(); - try { - LOG.debug("Parsing schema script " + schemaFile); - subScripts.addAll(findCreateTable(schemaFile, schemaTables)); - while (subScripts.size() > 0) { - schemaFile = baseDir + "/" + dbType + "/" + subScripts.remove(0); - LOG.debug("Parsing subscript " + schemaFile); - subScripts.addAll(findCreateTable(schemaFile, schemaTables)); - } - } catch (Exception e) { - System.err.println("Exception in parsing schema file. Cause:" + e.getMessage()); - System.out.println("Failed in schema table validation."); - return false; - } - - LOG.debug("Schema tables:[ " + Arrays.toString(schemaTables.toArray()) + " ]"); - LOG.debug("DB tables:[ " + Arrays.toString(dbTables.toArray()) + " ]"); - // now diff the lists - schemaTables.removeAll(dbTables); - if (schemaTables.size() > 0) { - Collections.sort(schemaTables); - System.err.println("Table(s) [ " + Arrays.toString(schemaTables.toArray()) - + " ] are missing from the metastore database schema."); - System.out.println("Failed in schema table validation."); - return false; - } else { - System.out.println("Succeeded in schema table validation."); - return true; - } - } - - private List findCreateTable(String path, List tableList) - throws Exception { - NestedScriptParser sp = HiveSchemaHelper.getDbCommandParser(dbType, false); - Matcher matcher = null; - Pattern regexp = null; - List subs = new ArrayList(); - int groupNo = 2; - - regexp = Pattern.compile("CREATE TABLE(\\s+IF NOT EXISTS)?\\s+(\\S+).*"); - - if (!(new File(path)).exists()) { - throw new Exception(path + " does not exist. Potentially incorrect version in the metastore VERSION table"); - } - - try ( - BufferedReader reader = new BufferedReader(new FileReader(path)); - ){ - String line = null; - while ((line = reader.readLine()) != null) { - if (sp.isNestedScript(line)) { - String subScript = null; - subScript = sp.getScriptName(line); - LOG.debug("Schema subscript " + subScript + " found"); - subs.add(subScript); - continue; - } - line = line.replaceAll("( )+", " "); //suppress multi-spaces - line = line.replaceAll("\\(", " "); - line = line.replaceAll("IF NOT EXISTS ", ""); - line = line.replaceAll("`",""); - line = line.replaceAll("'",""); - line = line.replaceAll("\"",""); - matcher = regexp.matcher(line); - - if (matcher.find()) { - String table = matcher.group(groupNo); - if (dbType.equals("derby")) - table = table.replaceAll("APP\\.",""); - tableList.add(table.toLowerCase()); - LOG.debug("Found table " + table + " in the schema"); - } - } - } catch (IOException ex){ - throw new Exception(ex.getMessage()); - } - - return subs; - } - - boolean validateColumnNullValues(Connection conn) throws HiveMetaException { - System.out.println("Validating columns for incorrect NULL values."); - boolean isValid = true; - try { - Statement stmt = conn.createStatement(); - String tblQuery = needsQuotedIdentifier ? - ("select t.* from \"TBLS\" t WHERE t.\"SD_ID\" IS NULL and (t.\"TBL_TYPE\"='" + TableType.EXTERNAL_TABLE + "' or t.\"TBL_TYPE\"='" + TableType.MANAGED_TABLE + "') order by t.\"TBL_ID\" ") - : ("select t.* from TBLS t WHERE t.SD_ID IS NULL and (t.TBL_TYPE='" + TableType.EXTERNAL_TABLE + "' or t.TBL_TYPE='" + TableType.MANAGED_TABLE + "') order by t.TBL_ID "); - - ResultSet res = stmt.executeQuery(tblQuery); - while (res.next()) { - long tableId = res.getLong("TBL_ID"); - String tableName = res.getString("TBL_NAME"); - String tableType = res.getString("TBL_TYPE"); - isValid = false; - System.err.println("SD_ID in TBLS should not be NULL for Table Name=" + tableName + ", Table ID=" + tableId + ", Table Type=" + tableType); - } - - System.out.println((isValid ? "Succeeded" : "Failed") + " in column validation for incorrect NULL values."); - return isValid; - } catch(SQLException e) { - throw new HiveMetaException("Failed to validate columns for incorrect NULL values", e); - } - } - - @VisibleForTesting - void createCatalog(String catName, String location, String description, boolean ifNotExists) - throws HiveMetaException { - catName = normalizeIdentifier(catName); - System.out.println("Create catalog " + catName + " at location " + location); - - Connection conn = getConnectionToMetastore(true); - boolean success = false; - try { - conn.setAutoCommit(false); - try (Statement stmt = conn.createStatement()) { - // If they set ifNotExists check for existence first, and bail if it exists. This is - // more reliable then attempting to parse the error message from the SQLException. - if (ifNotExists) { - String query = "select " + quoteIf("NAME") + " from " + quoteIf("CTLGS") + - " where " + quoteIf("NAME") + " = '" + catName + "'"; - LOG.debug("Going to run " + query); - ResultSet rs = stmt.executeQuery(query); - if (rs.next()) { - System.out.println("Catalog " + catName + " already exists"); - return; - } - } - String query = "select max(" + quoteIf("CTLG_ID") + ") from " + quoteIf("CTLGS"); - LOG.debug("Going to run " + query); - ResultSet rs = stmt.executeQuery(query); - if (!rs.next()) { - throw new HiveMetaException("No catalogs found, have you upgraded the database?"); - } - int catNum = rs.getInt(1) + 1; - // We need to stay out of the way of any sequences used by the underlying database. - // Otherwise the next time the client tries to add a catalog we'll get an error. - // There should never be billions of catalogs, so we'll shift our sequence number up - // there to avoid clashes. - int floor = 1 << 30; - if (catNum < floor) catNum = floor; - - String update = "insert into " + quoteIf("CTLGS") + - "(" + quoteIf("CTLG_ID") + ", " + quoteIf("NAME") + ", " + quoteAlways("DESC") + ", " + quoteIf( "LOCATION_URI") + ") " + - " values (" + catNum + ", '" + catName + "', '" + description + "', '" + location + "')"; - LOG.debug("Going to run " + update); - stmt.execute(update); - conn.commit(); - success = true; - } - } catch (SQLException e) { - throw new HiveMetaException("Failed to add catalog", e); - } finally { - try { - if (!success) conn.rollback(); - } catch (SQLException e) { - // Not really much we can do here. - LOG.error("Failed to rollback, everything will probably go bad from here.", e); - } - } - } - - @VisibleForTesting - void alterCatalog(String catName, String location, String description) throws HiveMetaException { - if (location == null && description == null) { - throw new HiveMetaException("Asked to update catalog " + catName + - " but not given any changes to update"); - } - catName = normalizeIdentifier(catName); - System.out.println("Updating catalog " + catName); - - Connection conn = getConnectionToMetastore(true); - boolean success = false; - try { - conn.setAutoCommit(false); - try (Statement stmt = conn.createStatement()) { - StringBuilder update = new StringBuilder("update ") - .append(quoteIf("CTLGS")) - .append(" set "); - if (location != null) { - update.append(quoteIf("LOCATION_URI")) - .append(" = '") - .append(location) - .append("' "); - } - if (description != null) { - if (location != null) update.append(", "); - update.append(quoteAlways("DESC")) - .append(" = '") - .append(description) - .append("'"); - } - update.append(" where ") - .append(quoteIf("NAME")) - .append(" = '") - .append(catName) - .append("'"); - LOG.debug("Going to run " + update.toString()); - int count = stmt.executeUpdate(update.toString()); - if (count != 1) { - throw new HiveMetaException("Failed to find catalog " + catName + " to update"); - } - conn.commit(); - success = true; - } - } catch (SQLException e) { - throw new HiveMetaException("Failed to update catalog", e); - } finally { - try { - if (!success) conn.rollback(); - } catch (SQLException e) { - // Not really much we can do here. - LOG.error("Failed to rollback, everything will probably go bad from here.", e); - } - } - } - - @VisibleForTesting - void moveDatabase(String fromCatName, String toCatName, String dbName) throws HiveMetaException { - fromCatName = normalizeIdentifier(fromCatName); - toCatName = normalizeIdentifier(toCatName); - dbName = normalizeIdentifier(dbName); - System.out.println("Moving database " + dbName + " from catalog " + fromCatName + - " to catalog " + toCatName); - Connection conn = getConnectionToMetastore(true); - boolean success = false; - try { - conn.setAutoCommit(false); - try (Statement stmt = conn.createStatement()) { - updateCatalogNameInTable(stmt, "DBS", "CTLG_NAME", "NAME", fromCatName, toCatName, dbName, false); - updateCatalogNameInTable(stmt, "TAB_COL_STATS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); - updateCatalogNameInTable(stmt, "PART_COL_STATS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); - updateCatalogNameInTable(stmt, "PARTITION_EVENTS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); - updateCatalogNameInTable(stmt, "NOTIFICATION_LOG", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); - conn.commit(); - success = true; - } - } catch (SQLException e) { - throw new HiveMetaException("Failed to move database", e); - } finally { - try { - if (!success) conn.rollback(); - } catch (SQLException e) { - // Not really much we can do here. - LOG.error("Failed to rollback, everything will probably go bad from here."); - } - } - } - - private void updateCatalogNameInTable(Statement stmt, String tableName, String catColName, - String dbColName, String fromCatName, - String toCatName, String dbName, boolean zeroUpdatesOk) - throws HiveMetaException, SQLException { - String update = "update " + quoteIf(tableName) + " " + - "set " + quoteIf(catColName) + " = '" + toCatName + "' " + - "where " + quoteIf(catColName) + " = '" + fromCatName + "' and " + quoteIf(dbColName) + " = '" + dbName + "'"; - LOG.debug("Going to run " + update); - int numUpdated = stmt.executeUpdate(update); - if (numUpdated != 1 && !(zeroUpdatesOk && numUpdated == 0)) { - throw new HiveMetaException("Failed to properly update the " + tableName + - " table. Expected to update 1 row but instead updated " + numUpdated); - } - } - - @VisibleForTesting - void moveTable(String fromCat, String toCat, String fromDb, String toDb, String tableName) - throws HiveMetaException { - fromCat = normalizeIdentifier(fromCat); - toCat = normalizeIdentifier(toCat); - fromDb = normalizeIdentifier(fromDb); - toDb = normalizeIdentifier(toDb); - tableName = normalizeIdentifier(tableName); - Connection conn = getConnectionToMetastore(true); - boolean success = false; - try { - conn.setAutoCommit(false); - try (Statement stmt = conn.createStatement()) { - // Find the old database id - String query = "select " + quoteIf("DB_ID") + - " from " + quoteIf("DBS") + - " where " + quoteIf("NAME") + " = '" + fromDb + "' " - + "and " + quoteIf("CTLG_NAME") + " = '" + fromCat + "'"; - LOG.debug("Going to run " + query); - ResultSet rs = stmt.executeQuery(query); - if (!rs.next()) { - throw new HiveMetaException("Unable to find database " + fromDb); - } - long oldDbId = rs.getLong(1); - - // Find the new database id - query = "select " + quoteIf("DB_ID") + - " from " + quoteIf("DBS") + - " where " + quoteIf("NAME") + " = '" + toDb + "' " - + "and " + quoteIf("CTLG_NAME") + " = '" + toCat + "'"; - LOG.debug("Going to run " + query); - rs = stmt.executeQuery(query); - if (!rs.next()) { - throw new HiveMetaException("Unable to find database " + toDb); - } - long newDbId = rs.getLong(1); - - String update = "update " + quoteIf("TBLS") + " " + - "set " + quoteIf("DB_ID") + " = " + newDbId + " " + - "where " + quoteIf("DB_ID") + " = " + oldDbId + - " and " + quoteIf("TBL_NAME") + " = '" + tableName + "'"; - LOG.debug("Going to run " + update); - int numUpdated = stmt.executeUpdate(update); - if (numUpdated != 1) { - throw new HiveMetaException( - "Failed to properly update TBLS table. Expected to update " + - "1 row but instead updated " + numUpdated); - } - updateDbNameForTable(stmt, "TAB_COL_STATS", "TABLE_NAME", fromCat, toCat, fromDb, toDb, tableName); - updateDbNameForTable(stmt, "PART_COL_STATS", "TABLE_NAME", fromCat, toCat, fromDb, toDb, tableName); - updateDbNameForTable(stmt, "PARTITION_EVENTS", "TBL_NAME", fromCat, toCat, fromDb, toDb, tableName); - updateDbNameForTable(stmt, "NOTIFICATION_LOG", "TBL_NAME", fromCat, toCat, fromDb, toDb, tableName); - conn.commit(); - success = true; - } - } catch (SQLException se) { - throw new HiveMetaException("Failed to move table", se); - } finally { - try { - if (!success) conn.rollback(); - } catch (SQLException e) { - // Not really much we can do here. - LOG.error("Failed to rollback, everything will probably go bad from here."); - } - - } - } - - private void updateDbNameForTable(Statement stmt, String tableName, - String tableColumnName, String fromCat, String toCat, - String fromDb, String toDb, String hiveTblName) - throws HiveMetaException, SQLException { - String update = "update " + quoteIf(tableName) + " " + - "set " + quoteIf("CAT_NAME") + " = '" + toCat + "', " + quoteIf("DB_NAME") + " = '" + toDb + "' " + - "where " + quoteIf("CAT_NAME") + " = '" + fromCat + "' " + - "and " + quoteIf("DB_NAME") + " = '" + fromDb + "' " + - "and " + quoteIf(tableColumnName) + " = '" + hiveTblName + "'"; - LOG.debug("Going to run " + update); - int numUpdated = stmt.executeUpdate(update); - if (numUpdated > 1 || numUpdated < 0) { - throw new HiveMetaException("Failed to properly update the " + tableName + - " table. Expected to update 1 row but instead updated " + numUpdated); - } - } - - // Quote if the database requires it - private String quoteIf(String identifier) { - return needsQuotedIdentifier ? quoteCharacter + identifier + quoteCharacter : identifier; - } - - // Quote always, for fields that mimic SQL keywords, like DESC - private String quoteAlways(String identifier) { - return quoteCharacter + identifier + quoteCharacter; - } - - /** - * Run pre-upgrade scripts corresponding to a given upgrade script, - * if any exist. The errors from pre-upgrade are ignored. - * Pre-upgrade scripts typically contain setup statements which - * may fail on some database versions and failure is ignorable. - * - * @param scriptDir upgrade script directory name - * @param scriptFile upgrade script file name - */ - private void runPreUpgrade(String scriptDir, String scriptFile) { - for (int i = 0;; i++) { - String preUpgradeScript = - metaStoreSchemaInfo.getPreUpgradeScriptName(i, scriptFile); - File preUpgradeScriptFile = new File(scriptDir, preUpgradeScript); - if (!preUpgradeScriptFile.isFile()) { - break; - } - - try { - runBeeLine(scriptDir, preUpgradeScript); - System.out.println("Completed " + preUpgradeScript); - } catch (Exception e) { - // Ignore the pre-upgrade script errors - System.err.println("Warning in pre-upgrade script " + preUpgradeScript + ": " - + e.getMessage()); - if (verbose) { - e.printStackTrace(); - } - } - } - } - - /*** - * Run beeline with the given metastore script. Flatten the nested scripts - * into single file. - */ - private void runBeeLine(String scriptDir, String scriptFile) - throws IOException, HiveMetaException { - NestedScriptParser dbCommandParser = getDbCommandParser(dbType, metaDbType); - - // expand the nested script - // If the metaDbType is set, this is setting up the information - // schema in Hive. That specifically means that the sql commands need - // to be adjusted for the underlying RDBMS (correct quotation - // strings, etc). - String sqlCommands = dbCommandParser.buildCommand(scriptDir, scriptFile, metaDbType != null); - File tmpFile = File.createTempFile("schematool", ".sql"); - tmpFile.deleteOnExit(); - - // write out the buffer into a file. Add beeline commands for autocommit and close - FileWriter fstream = new FileWriter(tmpFile.getPath()); - BufferedWriter out = new BufferedWriter(fstream); - out.write("!autocommit on" + System.getProperty("line.separator")); - out.write(sqlCommands); - out.write("!closeall" + System.getProperty("line.separator")); - out.close(); - runBeeLine(tmpFile.getPath()); - } - - // Generate the beeline args per hive conf and execute the given script - public void runBeeLine(String sqlScriptFile) throws IOException { - CommandBuilder builder = new CommandBuilder(hiveConf, url, driver, - userName, passWord, sqlScriptFile); - - // run the script using Beeline - try (BeeLine beeLine = new BeeLine()) { - if (!verbose) { - beeLine.setOutputStream(new PrintStream(new NullOutputStream())); - beeLine.getOpts().setSilent(true); - } - beeLine.getOpts().setAllowMultiLineCommand(false); - beeLine.getOpts().setIsolation("TRANSACTION_READ_COMMITTED"); - // We can be pretty sure that an entire line can be processed as a single command since - // we always add a line separator at the end while calling dbCommandParser.buildCommand. - beeLine.getOpts().setEntireLineAsCommand(true); - LOG.debug("Going to run command <" + builder.buildToLog() + ">"); - int status = beeLine.begin(builder.buildToRun(), null); - if (status != 0) { - throw new IOException("Schema script failed, errorcode " + status); - } - } - } - - static class CommandBuilder { - private final HiveConf hiveConf; - private final String userName; - private final String password; - private final String sqlScriptFile; - private final String driver; - private final String url; - - CommandBuilder(HiveConf hiveConf, String url, String driver, - String userName, String password, String sqlScriptFile) { - this.hiveConf = hiveConf; - this.userName = userName; - this.password = password; - this.url = url; - this.driver = driver; - this.sqlScriptFile = sqlScriptFile; - } - - String[] buildToRun() throws IOException { - return argsWith(password); - } - - String buildToLog() throws IOException { - logScript(); - return StringUtils.join(argsWith(BeeLine.PASSWD_MASK), " "); - } - - private String[] argsWith(String password) throws IOException { - return new String[] - { - "-u", url == null ? HiveSchemaHelper.getValidConfVar(MetastoreConf.ConfVars.CONNECT_URL_KEY, hiveConf) : url, - "-d", driver == null ? HiveSchemaHelper.getValidConfVar(MetastoreConf.ConfVars.CONNECTION_DRIVER, hiveConf) : driver, - "-n", userName, - "-p", password, - "-f", sqlScriptFile - }; - } - - private void logScript() throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("Going to invoke file that contains:"); - try (BufferedReader reader = new BufferedReader(new FileReader(sqlScriptFile))) { - String line; - while ((line = reader.readLine()) != null) { - LOG.debug("script: " + line); - } - } - } - } - } - - // Create the required command line options - @SuppressWarnings("static-access") - private static void initOptions(Options cmdLineOptions) { - Option help = new Option("help", "print this message"); - Option upgradeOpt = new Option("upgradeSchema", "Schema upgrade"); - Option upgradeFromOpt = OptionBuilder.withArgName("upgradeFrom").hasArg(). - withDescription("Schema upgrade from a version"). - create("upgradeSchemaFrom"); - Option initOpt = new Option("initSchema", "Schema initialization"); - Option initToOpt = OptionBuilder.withArgName("initTo").hasArg(). - withDescription("Schema initialization to a version"). - create("initSchemaTo"); - Option infoOpt = new Option("info", "Show config and schema details"); - Option validateOpt = new Option("validate", "Validate the database"); - Option createCatalog = OptionBuilder - .hasArg() - .withDescription("Create a catalog, requires --catalogLocation parameter as well") - .create("createCatalog"); - Option alterCatalog = OptionBuilder - .hasArg() - .withDescription("Alter a catalog, requires --catalogLocation and/or --catalogDescription parameter as well") - .create("alterCatalog"); - Option moveDatabase = OptionBuilder - .hasArg() - .withDescription("Move a database between catalogs. Argument is the database name. " + - "Requires --fromCatalog and --toCatalog parameters as well") - .create("moveDatabase"); - Option moveTable = OptionBuilder - .hasArg() - .withDescription("Move a table to a different database. Argument is the table name. " + - "Requires --fromCatalog, --toCatalog, --fromDatabase, and --toDatabase " + - " parameters as well.") - .create("moveTable"); - - OptionGroup optGroup = new OptionGroup(); - optGroup.addOption(upgradeOpt) - .addOption(initOpt) - .addOption(help) - .addOption(upgradeFromOpt) - .addOption(initToOpt) - .addOption(infoOpt) - .addOption(validateOpt) - .addOption(createCatalog) - .addOption(alterCatalog) - .addOption(moveDatabase) - .addOption(moveTable); - optGroup.setRequired(true); - - Option userNameOpt = OptionBuilder.withArgName("user") - .hasArgs() - .withDescription("Override config file user name") - .create("userName"); - Option passwdOpt = OptionBuilder.withArgName("password") - .hasArgs() - .withDescription("Override config file password") - .create("passWord"); - Option dbTypeOpt = OptionBuilder.withArgName("databaseType") - .hasArgs().withDescription("Metastore database type") - .create("dbType"); - Option metaDbTypeOpt = OptionBuilder.withArgName("metaDatabaseType") - .hasArgs().withDescription("Used only if upgrading the system catalog for hive") - .create("metaDbType"); - Option urlOpt = OptionBuilder.withArgName("url") - .hasArgs().withDescription("connection url to the database") - .create("url"); - Option driverOpt = OptionBuilder.withArgName("driver") - .hasArgs().withDescription("driver name for connection") - .create("driver"); - Option dbOpts = OptionBuilder.withArgName("databaseOpts") - .hasArgs().withDescription("Backend DB specific options") - .create("dbOpts"); - Option dryRunOpt = new Option("dryRun", "list SQL scripts (no execute)"); - Option verboseOpt = new Option("verbose", "only print SQL statements"); - Option serversOpt = OptionBuilder.withArgName("serverList") - .hasArgs().withDescription("a comma-separated list of servers used in location validation in the format of scheme://authority (e.g. hdfs://localhost:8000)") - .create("servers"); - Option catalogLocation = OptionBuilder - .hasArg() - .withDescription("Location of new catalog, required when adding a catalog") - .create("catalogLocation"); - Option catalogDescription = OptionBuilder - .hasArg() - .withDescription("Description of new catalog") - .create("catalogDescription"); - Option ifNotExists = OptionBuilder - .withDescription("If passed then it is not an error to create an existing catalog") - .create("ifNotExists"); - Option toCatalog = OptionBuilder - .hasArg() - .withDescription("Catalog a moving database or table is going to. This is " + - "required if you are moving a database or table.") - .create("toCatalog"); - Option fromCatalog = OptionBuilder - .hasArg() - .withDescription("Catalog a moving database or table is coming from. This is " + - "required if you are moving a database or table.") - .create("fromCatalog"); - Option toDatabase = OptionBuilder - .hasArg() - .withDescription("Database a moving table is going to. This is " + - "required if you are moving a table.") - .create("toDatabase"); - Option fromDatabase = OptionBuilder - .hasArg() - .withDescription("Database a moving table is coming from. This is " + - "required if you are moving a table.") - .create("fromDatabase"); - cmdLineOptions.addOption(help); - cmdLineOptions.addOption(dryRunOpt); - cmdLineOptions.addOption(userNameOpt); - cmdLineOptions.addOption(passwdOpt); - cmdLineOptions.addOption(dbTypeOpt); - cmdLineOptions.addOption(verboseOpt); - cmdLineOptions.addOption(metaDbTypeOpt); - cmdLineOptions.addOption(urlOpt); - cmdLineOptions.addOption(driverOpt); - cmdLineOptions.addOption(dbOpts); - cmdLineOptions.addOption(serversOpt); - cmdLineOptions.addOption(catalogLocation); - cmdLineOptions.addOption(catalogDescription); - cmdLineOptions.addOption(ifNotExists); - cmdLineOptions.addOption(toCatalog); - cmdLineOptions.addOption(fromCatalog); - cmdLineOptions.addOption(toDatabase); - cmdLineOptions.addOption(fromDatabase); - cmdLineOptions.addOptionGroup(optGroup); - } - - public static void main(String[] args) { - CommandLineParser parser = new GnuParser(); - CommandLine line = null; - String dbType = null; - String metaDbType = null; - String schemaVer = null; - Options cmdLineOptions = new Options(); - - // Argument handling - initOptions(cmdLineOptions); - try { - line = parser.parse(cmdLineOptions, args); - } catch (ParseException e) { - System.err.println("HiveSchemaTool:Parsing failed. Reason: " + e.getLocalizedMessage()); - printAndExit(cmdLineOptions); - } - - if (line.hasOption("help")) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("schemaTool", cmdLineOptions); - return; - } - - if (line.hasOption("dbType")) { - dbType = line.getOptionValue("dbType"); - if ((!dbType.equalsIgnoreCase(HiveSchemaHelper.DB_DERBY) && - !dbType.equalsIgnoreCase(HiveSchemaHelper.DB_HIVE) && - !dbType.equalsIgnoreCase(HiveSchemaHelper.DB_MSSQL) && - !dbType.equalsIgnoreCase(HiveSchemaHelper.DB_MYSQL) && - !dbType.equalsIgnoreCase(HiveSchemaHelper.DB_POSTGRACE) && !dbType - .equalsIgnoreCase(HiveSchemaHelper.DB_ORACLE))) { - System.err.println("Unsupported dbType " + dbType); - printAndExit(cmdLineOptions); - } - } else { - System.err.println("no dbType supplied"); - printAndExit(cmdLineOptions); - } - - if (line.hasOption("metaDbType")) { - metaDbType = line.getOptionValue("metaDbType"); - - if (!dbType.equals(HiveSchemaHelper.DB_HIVE)) { - System.err.println("metaDbType only supported for dbType = hive"); - printAndExit(cmdLineOptions); - } - - if (!metaDbType.equalsIgnoreCase(HiveSchemaHelper.DB_DERBY) && - !metaDbType.equalsIgnoreCase(HiveSchemaHelper.DB_MSSQL) && - !metaDbType.equalsIgnoreCase(HiveSchemaHelper.DB_MYSQL) && - !metaDbType.equalsIgnoreCase(HiveSchemaHelper.DB_POSTGRACE) && - !metaDbType.equalsIgnoreCase(HiveSchemaHelper.DB_ORACLE)) { - System.err.println("Unsupported metaDbType " + metaDbType); - printAndExit(cmdLineOptions); - } - } else if (dbType.equalsIgnoreCase(HiveSchemaHelper.DB_HIVE)) { - System.err.println("no metaDbType supplied"); - printAndExit(cmdLineOptions); - } - - - System.setProperty(HiveConf.ConfVars.METASTORE_SCHEMA_VERIFICATION.varname, "true"); - try { - HiveSchemaTool schemaTool = new HiveSchemaTool(dbType, metaDbType); - - if (line.hasOption("userName")) { - schemaTool.setUserName(line.getOptionValue("userName")); - } else { - schemaTool.setUserName( - schemaTool.getHiveConf().get(ConfVars.METASTORE_CONNECTION_USER_NAME.varname)); - } - if (line.hasOption("passWord")) { - schemaTool.setPassWord(line.getOptionValue("passWord")); - } else { - try { - schemaTool.setPassWord(ShimLoader.getHadoopShims().getPassword(schemaTool.getHiveConf(), - HiveConf.ConfVars.METASTOREPWD.varname)); - } catch (IOException err) { - throw new HiveMetaException("Error getting metastore password", err); - } - } - if (line.hasOption("url")) { - schemaTool.setUrl(line.getOptionValue("url")); - } - if (line.hasOption("driver")) { - schemaTool.setDriver(line.getOptionValue("driver")); - } - if (line.hasOption("dryRun")) { - schemaTool.setDryRun(true); - } - if (line.hasOption("verbose")) { - schemaTool.setVerbose(true); - } - if (line.hasOption("dbOpts")) { - schemaTool.setDbOpts(line.getOptionValue("dbOpts")); - } - if (line.hasOption("validate") && line.hasOption("servers")) { - schemaTool.setValidationServers(line.getOptionValue("servers")); - } - if (line.hasOption("info")) { - schemaTool.showInfo(); - } else if (line.hasOption("upgradeSchema")) { - schemaTool.doUpgrade(); - } else if (line.hasOption("upgradeSchemaFrom")) { - schemaVer = line.getOptionValue("upgradeSchemaFrom"); - schemaTool.doUpgrade(schemaVer); - } else if (line.hasOption("initSchema")) { - schemaTool.doInit(); - } else if (line.hasOption("initSchemaTo")) { - schemaVer = line.getOptionValue("initSchemaTo"); - schemaTool.doInit(schemaVer); - } else if (line.hasOption("validate")) { - schemaTool.doValidate(); - } else if (line.hasOption("createCatalog")) { - schemaTool.createCatalog(line.getOptionValue("createCatalog"), - line.getOptionValue("catalogLocation"), line.getOptionValue("catalogDescription"), - line.hasOption("ifNotExists")); - } else if (line.hasOption("alterCatalog")) { - schemaTool.alterCatalog(line.getOptionValue("alterCatalog"), - line.getOptionValue("catalogLocation"), line.getOptionValue("catalogDescription")); - } else if (line.hasOption("moveDatabase")) { - schemaTool.moveDatabase(line.getOptionValue("fromCatalog"), - line.getOptionValue("toCatalog"), line.getOptionValue("moveDatabase")); - } else if (line.hasOption("moveTable")) { - schemaTool.moveTable(line.getOptionValue("fromCatalog"), line.getOptionValue("toCatalog"), - line.getOptionValue("fromDatabase"), line.getOptionValue("toDatabase"), - line.getOptionValue("moveTable")); - } else { - System.err.println("no valid option supplied"); - printAndExit(cmdLineOptions); - } - } catch (HiveMetaException e) { - System.err.println(e); - if (e.getCause() != null) { - Throwable t = e.getCause(); - System.err.println("Underlying cause: " - + t.getClass().getName() + " : " - + t.getMessage()); - if (e.getCause() instanceof SQLException) { - System.err.println("SQL Error code: " + ((SQLException)t).getErrorCode()); - } - } - if (line.hasOption("verbose")) { - e.printStackTrace(); - } else { - System.err.println("Use --verbose for detailed stacktrace."); - } - System.err.println("*** schemaTool failed ***"); - System.exit(1); - } - System.out.println("schemaTool completed"); - - } -} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaTool.java new file mode 100644 index 0000000..39f276a --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaTool.java @@ -0,0 +1,412 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline.schematool; + +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.output.NullOutputStream; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.IMetaStoreSchemaInfo; +import org.apache.hadoop.hive.metastore.MetaStoreSchemaInfoFactory; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.MetaStoreConnectionInfo; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.NestedScriptParser; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hive.beeline.BeeLine; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintStream; +import java.net.URI; +import java.sql.Connection; +import java.sql.SQLException; + +public class HiveSchemaTool { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaTool.class.getName()); + + private final HiveConf hiveConf; + private final String dbType; + private final String metaDbType; + private final IMetaStoreSchemaInfo metaStoreSchemaInfo; + private final boolean needsQuotedIdentifier; + private String quoteCharacter; + + private String url = null; + private String driver = null; + private String userName = null; + private String passWord = null; + private boolean dryRun = false; + private boolean verbose = false; + private String dbOpts = null; + private URI[] validationServers = null; // The list of servers the database/partition/table can locate on + + private HiveSchemaTool(String dbType, String metaDbType) throws HiveMetaException { + this(System.getenv("HIVE_HOME"), new HiveConf(HiveSchemaTool.class), dbType, metaDbType); + } + + @VisibleForTesting + public HiveSchemaTool(String hiveHome, HiveConf hiveConf, String dbType, String metaDbType) + throws HiveMetaException { + if (hiveHome == null || hiveHome.isEmpty()) { + throw new HiveMetaException("No Hive home directory provided"); + } + this.hiveConf = hiveConf; + this.dbType = dbType; + this.metaDbType = metaDbType; + NestedScriptParser parser = getDbCommandParser(dbType, metaDbType); + this.needsQuotedIdentifier = parser.needsQuotedIdentifier(); + this.quoteCharacter = parser.getQuoteCharacter(); + this.metaStoreSchemaInfo = MetaStoreSchemaInfoFactory.get(hiveConf, hiveHome, dbType); + // If the dbType is "hive", this is setting up the information schema in Hive. + // We will set the default jdbc url and driver. + // It is overriden by command line options if passed (-url and -driver + if (dbType.equalsIgnoreCase(HiveSchemaHelper.DB_HIVE)) { + url = HiveSchemaHelper.EMBEDDED_HS2_URL; + driver = HiveSchemaHelper.HIVE_JDBC_DRIVER; + } + } + + HiveConf getHiveConf() { + return hiveConf; + } + + String getDbType() { + return dbType; + } + + IMetaStoreSchemaInfo getMetaStoreSchemaInfo() { + return metaStoreSchemaInfo; + } + + private void setUrl(String url) { + this.url = url; + } + + private void setDriver(String driver) { + this.driver = driver; + } + + @VisibleForTesting + public void setUserName(String userName) { + this.userName = userName; + } + + @VisibleForTesting + public void setPassWord(String passWord) { + this.passWord = passWord; + } + + @VisibleForTesting + public void setDryRun(boolean dryRun) { + this.dryRun = dryRun; + } + + boolean isDryRun() { + return dryRun; + } + + @VisibleForTesting + public void setVerbose(boolean verbose) { + this.verbose = verbose; + } + + boolean isVerbose() { + return verbose; + } + + private void setDbOpts(String dbOpts) { + this.dbOpts = dbOpts; + } + + private void setValidationServers(String servers) { + if(StringUtils.isNotEmpty(servers)) { + String[] strServers = servers.split(","); + this.validationServers = new URI[strServers.length]; + for (int i = 0; i < validationServers.length; i++) { + validationServers[i] = new Path(strServers[i]).toUri(); + } + } + } + + URI[] getValidationServers() { + return validationServers; + } + + Connection getConnectionToMetastore(boolean printInfo) throws HiveMetaException { + return HiveSchemaHelper.getConnectionToMetastore(userName, passWord, url, driver, printInfo, hiveConf, + null); + } + + private NestedScriptParser getDbCommandParser(String dbType, String metaDbType) { + return HiveSchemaHelper.getDbCommandParser(dbType, dbOpts, userName, passWord, hiveConf, + metaDbType, false); + } + + // test the connection metastore using the config property + void testConnectionToMetastore() throws HiveMetaException { + Connection conn = getConnectionToMetastore(true); + try { + conn.close(); + } catch (SQLException e) { + throw new HiveMetaException("Failed to close metastore connection", e); + } + } + + /** + * check if the current schema version in metastore matches the Hive version + * @throws MetaException + */ + void verifySchemaVersion() throws HiveMetaException { + // don't check version if its a dry run + if (dryRun) { + return; + } + String newSchemaVersion = metaStoreSchemaInfo.getMetaStoreSchemaVersion(getConnectionInfo(false)); + // verify that the new version is added to schema + assertCompatibleVersion(metaStoreSchemaInfo.getHiveSchemaVersion(), newSchemaVersion); + } + + void assertCompatibleVersion(String hiveSchemaVersion, String dbSchemaVersion) + throws HiveMetaException { + if (!metaStoreSchemaInfo.isVersionCompatible(hiveSchemaVersion, dbSchemaVersion)) { + throw new HiveMetaException("Metastore schema version is not compatible. Hive Version: " + + hiveSchemaVersion + ", Database Schema Version: " + dbSchemaVersion); + } + } + + MetaStoreConnectionInfo getConnectionInfo(boolean printInfo) { + return new MetaStoreConnectionInfo(userName, passWord, url, driver, printInfo, hiveConf, + dbType, metaDbType); + } + + // Quote if the database requires it + String quote(String stmt) { + stmt = stmt.replace("", needsQuotedIdentifier ? quoteCharacter : ""); + stmt = stmt.replace("", quoteCharacter); + return stmt; + } + + /*** + * Run beeline with the given metastore script. Flatten the nested scripts + * into single file. + */ + void runBeeLine(String scriptDir, String scriptFile) + throws IOException, HiveMetaException { + NestedScriptParser dbCommandParser = getDbCommandParser(dbType, metaDbType); + + // expand the nested script + // If the metaDbType is set, this is setting up the information + // schema in Hive. That specifically means that the sql commands need + // to be adjusted for the underlying RDBMS (correct quotation + // strings, etc). + String sqlCommands = dbCommandParser.buildCommand(scriptDir, scriptFile, metaDbType != null); + File tmpFile = File.createTempFile("schematool", ".sql"); + tmpFile.deleteOnExit(); + + // write out the buffer into a file. Add beeline commands for autocommit and close + FileWriter fstream = new FileWriter(tmpFile.getPath()); + BufferedWriter out = new BufferedWriter(fstream); + out.write("!autocommit on" + System.getProperty("line.separator")); + out.write(sqlCommands); + out.write("!closeall" + System.getProperty("line.separator")); + out.close(); + runBeeLine(tmpFile.getPath()); + } + + // Generate the beeline args per hive conf and execute the given script + void runBeeLine(String sqlScriptFile) throws IOException { + CommandBuilder builder = new CommandBuilder(hiveConf, url, driver, + userName, passWord, sqlScriptFile); + + // run the script using Beeline + try (BeeLine beeLine = new BeeLine()) { + if (!verbose) { + beeLine.setOutputStream(new PrintStream(new NullOutputStream())); + beeLine.getOpts().setSilent(true); + } + beeLine.getOpts().setAllowMultiLineCommand(false); + beeLine.getOpts().setIsolation("TRANSACTION_READ_COMMITTED"); + // We can be pretty sure that an entire line can be processed as a single command since + // we always add a line separator at the end while calling dbCommandParser.buildCommand. + beeLine.getOpts().setEntireLineAsCommand(true); + LOG.debug("Going to run command <" + builder.buildToLog() + ">"); + int status = beeLine.begin(builder.buildToRun(), null); + if (status != 0) { + throw new IOException("Schema script failed, errorcode " + status); + } + } + } + + static class CommandBuilder { + private final HiveConf hiveConf; + private final String userName; + private final String password; + private final String sqlScriptFile; + private final String driver; + private final String url; + + CommandBuilder(HiveConf hiveConf, String url, String driver, + String userName, String password, String sqlScriptFile) { + this.hiveConf = hiveConf; + this.userName = userName; + this.password = password; + this.url = url; + this.driver = driver; + this.sqlScriptFile = sqlScriptFile; + } + + String[] buildToRun() throws IOException { + return argsWith(password); + } + + String buildToLog() throws IOException { + logScript(); + return StringUtils.join(argsWith(BeeLine.PASSWD_MASK), " "); + } + + private String[] argsWith(String password) throws IOException { + return new String[] + { + "-u", url == null ? HiveSchemaHelper.getValidConfVar(MetastoreConf.ConfVars.CONNECT_URL_KEY, hiveConf) : url, + "-d", driver == null ? HiveSchemaHelper.getValidConfVar(MetastoreConf.ConfVars.CONNECTION_DRIVER, hiveConf) : driver, + "-n", userName, + "-p", password, + "-f", sqlScriptFile + }; + } + + private void logScript() throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Going to invoke file that contains:"); + try (BufferedReader reader = new BufferedReader(new FileReader(sqlScriptFile))) { + String line; + while ((line = reader.readLine()) != null) { + LOG.debug("script: " + line); + } + } + } + } + } + + public static void main(String[] args) { + HiveSchemaToolCommandLine line = null; + try { + line = new HiveSchemaToolCommandLine(args); + } catch (ParseException e) { + System.exit(1); + } + + System.setProperty(MetastoreConf.ConfVars.SCHEMA_VERIFICATION.getVarname(), "true"); + try { + HiveSchemaTool schemaTool = createSchemaTool(line); + + HiveSchemaToolTask task = null; + if (line.hasOption("info")) { + task = new HiveSchemaToolTaskInfo(); + } else if (line.hasOption("upgradeSchema") || line.hasOption("upgradeSchemaFrom")) { + task = new HiveSchemaToolTaskUpgrade(); + } else if (line.hasOption("initSchema") || line.hasOption("initSchemaTo")) { + task = new HiveSchemaToolTaskInit(); + } else if (line.hasOption("validate")) { + task = new HiveSchemaToolTaskValidate(); + } else if (line.hasOption("createCatalog")) { + task = new HiveSchemaToolTaskCreateCatalog(); + } else if (line.hasOption("alterCatalog")) { + task = new HiveSchemaToolTaskAlterCatalog(); + } else if (line.hasOption("moveDatabase")) { + task = new HiveSchemaToolTaskMoveDatabase(); + } else if (line.hasOption("moveTable")) { + task = new HiveSchemaToolTaskMoveTable(); + } + + task.setHiveSchemaTool(schemaTool); + task.setCommandLineArguments(line); + task.execute(); + + } catch (HiveMetaException e) { + System.err.println(e); + if (e.getCause() != null) { + Throwable t = e.getCause(); + System.err.println("Underlying cause: " + t.getClass().getName() + " : " + t.getMessage()); + if (e.getCause() instanceof SQLException) { + System.err.println("SQL Error code: " + ((SQLException)t).getErrorCode()); + } + } + if (line.hasOption("verbose")) { + e.printStackTrace(); + } else { + System.err.println("Use --verbose for detailed stacktrace."); + } + System.err.println("*** schemaTool failed ***"); + System.exit(1); + } + System.out.println("schemaTool completed"); + } + + private static HiveSchemaTool createSchemaTool(HiveSchemaToolCommandLine line) throws HiveMetaException { + HiveSchemaTool schemaTool = new HiveSchemaTool(line.getDbType(), line.getMetaDbType()); + + if (line.hasOption("userName")) { + schemaTool.setUserName(line.getOptionValue("userName")); + } else { + schemaTool.setUserName( + schemaTool.getHiveConf().get(MetastoreConf.ConfVars.CONNECTION_USER_NAME.getVarname())); + } + if (line.hasOption("passWord")) { + schemaTool.setPassWord(line.getOptionValue("passWord")); + } else { + try { + schemaTool.setPassWord(ShimLoader.getHadoopShims().getPassword(schemaTool.getHiveConf(), + MetastoreConf.ConfVars.PWD.getVarname())); + } catch (IOException err) { + throw new HiveMetaException("Error getting metastore password", err); + } + } + if (line.hasOption("url")) { + schemaTool.setUrl(line.getOptionValue("url")); + } + if (line.hasOption("driver")) { + schemaTool.setDriver(line.getOptionValue("driver")); + } + if (line.hasOption("dryRun")) { + schemaTool.setDryRun(true); + } + if (line.hasOption("verbose")) { + schemaTool.setVerbose(true); + } + if (line.hasOption("dbOpts")) { + schemaTool.setDbOpts(line.getOptionValue("dbOpts")); + } + if (line.hasOption("validate") && line.hasOption("servers")) { + schemaTool.setValidationServers(line.getOptionValue("servers")); + } + return schemaTool; + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolCommandLine.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolCommandLine.java new file mode 100644 index 0000000..24b66ee --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolCommandLine.java @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline.schematool; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; + +import com.google.common.collect.ImmutableSet; + +import java.util.Set; + +class HiveSchemaToolCommandLine { + private final Options cmdLineOptions = createOptions(); + + @SuppressWarnings("static-access") + private Options createOptions() { + Option help = new Option("help", "print this message"); + Option infoOpt = new Option("info", "Show config and schema details"); + Option upgradeOpt = new Option("upgradeSchema", "Schema upgrade"); + Option upgradeFromOpt = OptionBuilder.withArgName("upgradeFrom").hasArg() + .withDescription("Schema upgrade from a version") + .create("upgradeSchemaFrom"); + Option initOpt = new Option("initSchema", "Schema initialization"); + Option initToOpt = OptionBuilder.withArgName("initTo").hasArg() + .withDescription("Schema initialization to a version") + .create("initSchemaTo"); + Option validateOpt = new Option("validate", "Validate the database"); + Option createCatalog = OptionBuilder + .hasArg() + .withDescription("Create a catalog, requires --catalogLocation parameter as well") + .create("createCatalog"); + Option alterCatalog = OptionBuilder + .hasArg() + .withDescription("Alter a catalog, requires --catalogLocation and/or --catalogDescription parameter as well") + .create("alterCatalog"); + Option moveDatabase = OptionBuilder + .hasArg() + .withDescription("Move a database between catalogs. Argument is the database name. " + + "Requires --fromCatalog and --toCatalog parameters as well") + .create("moveDatabase"); + Option moveTable = OptionBuilder + .hasArg() + .withDescription("Move a table to a different database. Argument is the table name. " + + "Requires --fromCatalog, --toCatalog, --fromDatabase, and --toDatabase " + + " parameters as well.") + .create("moveTable"); + + OptionGroup optGroup = new OptionGroup(); + optGroup + .addOption(help) + .addOption(infoOpt) + .addOption(upgradeOpt) + .addOption(upgradeFromOpt) + .addOption(initOpt) + .addOption(initToOpt) + .addOption(validateOpt) + .addOption(createCatalog) + .addOption(alterCatalog) + .addOption(moveDatabase) + .addOption(moveTable); + optGroup.setRequired(true); + + Option userNameOpt = OptionBuilder.withArgName("user") + .hasArgs() + .withDescription("Override config file user name") + .create("userName"); + Option passwdOpt = OptionBuilder.withArgName("password") + .hasArgs() + .withDescription("Override config file password") + .create("passWord"); + Option dbTypeOpt = OptionBuilder.withArgName("databaseType") + .hasArgs().withDescription("Metastore database type").isRequired() + .create("dbType"); + Option metaDbTypeOpt = OptionBuilder.withArgName("metaDatabaseType") + .hasArgs().withDescription("Used only if upgrading the system catalog for hive") + .create("metaDbType"); + Option urlOpt = OptionBuilder.withArgName("url") + .hasArgs().withDescription("connection url to the database") + .create("url"); + Option driverOpt = OptionBuilder.withArgName("driver") + .hasArgs().withDescription("driver name for connection") + .create("driver"); + Option dbOpts = OptionBuilder.withArgName("databaseOpts") + .hasArgs().withDescription("Backend DB specific options") + .create("dbOpts"); + Option dryRunOpt = new Option("dryRun", "list SQL scripts (no execute)"); + Option verboseOpt = new Option("verbose", "only print SQL statements"); + Option serversOpt = OptionBuilder.withArgName("serverList") + .hasArgs().withDescription("a comma-separated list of servers used in location validation in the format of scheme://authority (e.g. hdfs://localhost:8000)") + .create("servers"); + Option catalogLocation = OptionBuilder + .hasArg() + .withDescription("Location of new catalog, required when adding a catalog") + .create("catalogLocation"); + Option catalogDescription = OptionBuilder + .hasArg() + .withDescription("Description of new catalog") + .create("catalogDescription"); + Option ifNotExists = OptionBuilder + .withDescription("If passed then it is not an error to create an existing catalog") + .create("ifNotExists"); + Option fromCatalog = OptionBuilder + .hasArg() + .withDescription("Catalog a moving database or table is coming from. This is " + + "required if you are moving a database or table.") + .create("fromCatalog"); + Option toCatalog = OptionBuilder + .hasArg() + .withDescription("Catalog a moving database or table is going to. This is " + + "required if you are moving a database or table.") + .create("toCatalog"); + Option fromDatabase = OptionBuilder + .hasArg() + .withDescription("Database a moving table is coming from. This is " + + "required if you are moving a table.") + .create("fromDatabase"); + Option toDatabase = OptionBuilder + .hasArg() + .withDescription("Database a moving table is going to. This is " + + "required if you are moving a table.") + .create("toDatabase"); + + Options cmdLineOptions = new Options(); + cmdLineOptions.addOption(help); + cmdLineOptions.addOptionGroup(optGroup); + cmdLineOptions.addOption(dbTypeOpt); + cmdLineOptions.addOption(metaDbTypeOpt); + cmdLineOptions.addOption(userNameOpt); + cmdLineOptions.addOption(passwdOpt); + cmdLineOptions.addOption(urlOpt); + cmdLineOptions.addOption(driverOpt); + cmdLineOptions.addOption(dbOpts); + cmdLineOptions.addOption(dryRunOpt); + cmdLineOptions.addOption(verboseOpt); + cmdLineOptions.addOption(serversOpt); + cmdLineOptions.addOption(catalogLocation); + cmdLineOptions.addOption(catalogDescription); + cmdLineOptions.addOption(ifNotExists); + cmdLineOptions.addOption(fromCatalog); + cmdLineOptions.addOption(toCatalog); + cmdLineOptions.addOption(fromDatabase); + cmdLineOptions.addOption(toDatabase); + + return cmdLineOptions; + } + + private final CommandLine cl; + private final String dbType; + private final String metaDbType; + + HiveSchemaToolCommandLine(String[] args) throws ParseException { + cl = getCommandLine(args); + if (cl.hasOption("help")) { + printAndExit(null); + } + + dbType = cl.getOptionValue("dbType"); + metaDbType = cl.getOptionValue("metaDbType"); + + validate(); + } + + private CommandLine getCommandLine(String[] args) throws ParseException { + try { + CommandLineParser parser = new GnuParser(); + return parser.parse(cmdLineOptions, args); + } catch (ParseException e) { + printAndExit("HiveSchemaTool:Parsing failed. Reason: " + e.getLocalizedMessage()); + return null; + } + } + + private static final Set VALID_DB_TYPES = ImmutableSet.of(HiveSchemaHelper.DB_DERBY, + HiveSchemaHelper.DB_HIVE, HiveSchemaHelper.DB_MSSQL, HiveSchemaHelper.DB_MYSQL, + HiveSchemaHelper.DB_POSTGRACE, HiveSchemaHelper.DB_ORACLE); + + private static final Set VALID_META_DB_TYPES = ImmutableSet.of(HiveSchemaHelper.DB_DERBY, + HiveSchemaHelper.DB_MSSQL, HiveSchemaHelper.DB_MYSQL, HiveSchemaHelper.DB_POSTGRACE, + HiveSchemaHelper.DB_ORACLE); + + private void validate() throws ParseException { + if (!VALID_DB_TYPES.contains(dbType)) { + printAndExit("Unsupported dbType " + dbType); + } + + if (metaDbType != null) { + if (!dbType.equals(HiveSchemaHelper.DB_HIVE)) { + printAndExit("metaDbType may only be set if dbType is hive"); + } + if (!VALID_META_DB_TYPES.contains(metaDbType)) { + printAndExit("Unsupported metaDbType " + metaDbType); + } + } else if (dbType.equalsIgnoreCase(HiveSchemaHelper.DB_HIVE)) { + System.err.println(); + printAndExit("metaDbType must be set if dbType is hive"); + } + + if ((cl.hasOption("createCatalog")) && !cl.hasOption("catalogLocation")) { + System.err.println(); + printAndExit("catalogLocation must be set for createCatalog"); + } + + if (!cl.hasOption("createCatalog") && !cl.hasOption("alterCatalog") && + (cl.hasOption("catalogLocation") || cl.hasOption("catalogDescription"))) { + printAndExit("catalogLocation and catalogDescription may be set only for createCatalog and alterCatalog"); + } + + if (!cl.hasOption("createCatalog") && cl.hasOption("ifNotExists")) { + printAndExit("ifNotExists may be set only for createCatalog"); + } + + if (cl.hasOption("moveDatabase") && + (!cl.hasOption("fromCatalog") || !cl.hasOption("toCatalog"))) { + printAndExit("fromCatalog and toCatalog must be set for moveDatabase"); + } + + if (cl.hasOption("moveTable") && + (!cl.hasOption("fromCatalog") || !cl.hasOption("toCatalog") || + !cl.hasOption("fromDatabase") || !cl.hasOption("toDatabase"))) { + printAndExit("fromCatalog, toCatalog, fromDatabase and toDatabase must be set for moveTable"); + } + + if ((!cl.hasOption("moveDatabase") && !cl.hasOption("moveTable")) && + (cl.hasOption("fromCatalog") || cl.hasOption("toCatalog"))) { + printAndExit("fromCatalog and toCatalog may be set only for moveDatabase and moveTable"); + } + + if (!cl.hasOption("moveTable") && + (cl.hasOption("fromDatabase") || cl.hasOption("toDatabase"))) { + printAndExit("fromDatabase and toDatabase may be set only for moveTable"); + } + } + + private void printAndExit(String reason) throws ParseException { + if (reason != null) { + System.err.println(reason); + } + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("schemaTool", cmdLineOptions); + if (reason != null) { + throw new ParseException(reason); + } else { + System.exit(0); + } + } + + String getDbType() { + return dbType; + } + + String getMetaDbType() { + return metaDbType; + } + + boolean hasOption(String opt) { + return cl.hasOption(opt); + } + + String getOptionValue(String opt) { + return cl.getOptionValue(opt); + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTask.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTask.java new file mode 100644 index 0000000..1bc4b39 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTask.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import org.apache.hadoop.hive.metastore.HiveMetaException; + +abstract class HiveSchemaToolTask { + protected HiveSchemaTool schemaTool; + + void setHiveSchemaTool(HiveSchemaTool schemaTool) { + this.schemaTool = schemaTool; + } + + abstract void setCommandLineArguments(HiveSchemaToolCommandLine cl); + + abstract void execute() throws HiveMetaException; +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskAlterCatalog.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskAlterCatalog.java new file mode 100644 index 0000000..3cd209e --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskAlterCatalog.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; + +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskAlterCatalog extends HiveSchemaToolTask { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaToolTaskAlterCatalog.class.getName()); + + private String catName; + private String location; + private String description; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + catName = normalizeIdentifier(cl.getOptionValue("alterCatalog")); + location = cl.getOptionValue("catalogLocation"); + description = cl.getOptionValue("catalogDescription"); + } + + private static final String UPDATE_CATALOG_STMT = + "update CTLGS " + + " set LOCATION_URI = %s, " + + " DESC = %s " + + " where NAME = '%s'"; + + @Override + void execute() throws HiveMetaException { + if (location == null && description == null) { + throw new HiveMetaException("Asked to update catalog " + catName + " but not given any changes to update"); + } + System.out.println("Updating catalog " + catName); + + Connection conn = schemaTool.getConnectionToMetastore(true); + boolean success = false; + try { + conn.setAutoCommit(false); + try (Statement stmt = conn.createStatement()) { + String update = String.format(schemaTool.quote(UPDATE_CATALOG_STMT), + location == null ? schemaTool.quote("LOCATION_URI") : "'" + location + "'", + description == null ? schemaTool.quote("DESC") : "'" + description + "'", + catName + ); + LOG.debug("Going to run " + update.toString()); + int count = stmt.executeUpdate(update.toString()); + if (count != 1) { + throw new HiveMetaException("Failed to find catalog " + catName + " to update"); + } + conn.commit(); + success = true; + } + } catch (SQLException e) { + throw new HiveMetaException("Failed to update catalog", e); + } finally { + try { + if (!success) conn.rollback(); + } catch (SQLException e) { + // Not really much we can do here. + LOG.error("Failed to rollback, everything will probably go bad from here.", e); + } + } + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskCreateCatalog.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskCreateCatalog.java new file mode 100644 index 0000000..a3d252f --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskCreateCatalog.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskCreateCatalog extends HiveSchemaToolTask { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaToolTaskCreateCatalog.class.getName()); + + private String catName; + private String location; + private String description; + private boolean ifNotExists; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + catName = normalizeIdentifier(cl.getOptionValue("createCatalog")); + location = cl.getOptionValue("catalogLocation"); + description = cl.getOptionValue("catalogDescription"); + ifNotExists = cl.hasOption("ifNotExists"); + } + + @Override + void execute() throws HiveMetaException { + System.out.println("Create catalog " + catName + " at location " + location); + + Connection conn = schemaTool.getConnectionToMetastore(true); + boolean success = false; + try { + conn.setAutoCommit(false); + try (Statement stmt = conn.createStatement()) { + // If they set ifNotExists check for existence first, and bail if it exists. This is + // more reliable then attempting to parse the error message from the SQLException. + if (ifNotExists && catalogExists(stmt)) { + return; + } + + int catNum = getNextCatalogId(stmt); + addCatalog(conn, stmt, catNum); + success = true; + } + } catch (SQLException e) { + throw new HiveMetaException("Failed to add catalog", e); + } finally { + try { + if (!success) conn.rollback(); + } catch (SQLException e) { + // Not really much we can do here. + LOG.error("Failed to rollback, everything will probably go bad from here.", e); + } + } + } + + private static final String CATALOG_EXISTS_QUERY = + "select NAME " + + " from CTLGS " + + " where NAME = '%s'"; + + private boolean catalogExists(Statement stmt) throws SQLException { + String query = String.format(schemaTool.quote(CATALOG_EXISTS_QUERY), catName); + LOG.debug("Going to run " + query); + ResultSet rs = stmt.executeQuery(query); + if (rs.next()) { + System.out.println("Catalog " + catName + " already exists"); + return true; + } + + return false; + } + + private static final String NEXT_CATALOG_ID_QUERY = + "select max(CTLG_ID) " + + " from CTLGS"; + + private int getNextCatalogId(Statement stmt) throws SQLException, HiveMetaException { + String query = schemaTool.quote(NEXT_CATALOG_ID_QUERY); + LOG.debug("Going to run " + query); + ResultSet rs = stmt.executeQuery(query); + if (!rs.next()) { + throw new HiveMetaException("No catalogs found, have you upgraded the database?"); + } + int nextId = rs.getInt(1) + 1; + // We need to stay out of the way of any sequences used by the underlying database. + // Otherwise the next time the client tries to add a catalog we'll get an error. + // There should never be billions of catalogs, so we'll shift our sequence number up + // there to avoid clashes. + int floor = 1 << 30; + return Math.max(nextId, floor); + } + + private static final String ADD_CATALOG_STMT = + "insert into CTLGS (CTLG_ID, NAME, DESC, LOCATION_URI) " + + " values (%d, '%s', '%s', '%s')"; + + private void addCatalog(Connection conn, Statement stmt, int catNum) throws SQLException { + String update = String.format(schemaTool.quote(ADD_CATALOG_STMT), catNum, catName, description, location); + LOG.debug("Going to run " + update); + stmt.execute(update); + conn.commit(); + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInfo.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInfo.java new file mode 100644 index 0000000..23919e0 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInfo.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.MetaStoreConnectionInfo; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskInfo extends HiveSchemaToolTask { + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + // do nothing + } + + @Override + void execute() throws HiveMetaException { + String hiveVersion = schemaTool.getMetaStoreSchemaInfo().getHiveSchemaVersion(); + MetaStoreConnectionInfo connectionInfo = schemaTool.getConnectionInfo(true); + String dbVersion = schemaTool.getMetaStoreSchemaInfo().getMetaStoreSchemaVersion(connectionInfo); + + System.out.println("Hive distribution version:\t " + hiveVersion); + System.out.println("Metastore schema version:\t " + dbVersion); + + schemaTool.assertCompatibleVersion(hiveVersion, dbVersion); + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInit.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInit.java new file mode 100644 index 0000000..d15492a --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskInit.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import java.io.IOException; + +import org.apache.hadoop.hive.metastore.HiveMetaException; + +/** + * Initialize the metastore schema + */ +class HiveSchemaToolTaskInit extends HiveSchemaToolTask { + private boolean validate = true; + private String toVersion; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + if (cl.hasOption("initSchemaTo")) { + this.toVersion = cl.getOptionValue("initSchemaTo"); + this.validate = false; + } + } + + private void ensureToVersion() throws HiveMetaException { + if (toVersion != null) { + return; + } + + // If null then current hive version is used + toVersion = schemaTool.getMetaStoreSchemaInfo().getHiveSchemaVersion(); + System.out.println("Initializing the schema to: " + toVersion); + } + + @Override + void execute() throws HiveMetaException { + ensureToVersion(); + + schemaTool.testConnectionToMetastore(); + System.out.println("Starting metastore schema initialization to " + toVersion); + + String initScriptDir = schemaTool.getMetaStoreSchemaInfo().getMetaStoreScriptDir(); + String initScriptFile = schemaTool.getMetaStoreSchemaInfo().generateInitFileName(toVersion); + + try { + System.out.println("Initialization script " + initScriptFile); + if (!schemaTool.isDryRun()) { + schemaTool.runBeeLine(initScriptDir, initScriptFile); + System.out.println("Initialization script completed"); + } + } catch (IOException e) { + throw new HiveMetaException("Schema initialization FAILED! Metastore state would be inconsistent!", e); + } + + if (validate) { + schemaTool.verifySchemaVersion(); + } + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveDatabase.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveDatabase.java new file mode 100644 index 0000000..12ebbaf --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveDatabase.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; + +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskMoveDatabase extends HiveSchemaToolTask { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaToolTaskMoveDatabase.class.getName()); + + private String fromCatName; + private String toCatName; + private String dbName; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + fromCatName = normalizeIdentifier(cl.getOptionValue("fromCatalog")); + toCatName = normalizeIdentifier(cl.getOptionValue("toCatalog")); + dbName = normalizeIdentifier(cl.getOptionValue("moveDatabase")); + } + + @Override + void execute() throws HiveMetaException { + System.out.println(String.format("Moving database %s from catalog %s to catalog %s", + dbName, fromCatName, toCatName)); + Connection conn = schemaTool.getConnectionToMetastore(true); + boolean success = false; + try { + conn.setAutoCommit(false); + try (Statement stmt = conn.createStatement()) { + updateCatalogNameInTable(stmt, "DBS", "CTLG_NAME", "NAME", fromCatName, toCatName, dbName, false); + updateCatalogNameInTable(stmt, "TAB_COL_STATS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); + updateCatalogNameInTable(stmt, "PART_COL_STATS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); + updateCatalogNameInTable(stmt, "PARTITION_EVENTS", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); + updateCatalogNameInTable(stmt, "NOTIFICATION_LOG", "CAT_NAME", "DB_NAME", fromCatName, toCatName, dbName, true); + conn.commit(); + success = true; + } + } catch (SQLException e) { + throw new HiveMetaException("Failed to move database", e); + } finally { + try { + if (!success) conn.rollback(); + } catch (SQLException e) { + // Not really much we can do here. + LOG.error("Failed to rollback, everything will probably go bad from here."); + } + } + } + + private void updateCatalogNameInTable(Statement stmt, String tableName, String catColName, + String dbColName, String fromCatName, + String toCatName, String dbName, boolean zeroUpdatesOk) + throws HiveMetaException, SQLException { + String update = schemaTool.quote( + "update " + tableName + " " + + " set " + catColName + " = '" + toCatName + "' " + + " where " + catColName + " = '" + fromCatName + "'" + + " and " + dbColName + " = '" + dbName + "'"); + LOG.debug("Going to run " + update); + int numUpdated = stmt.executeUpdate(update); + if (numUpdated != 1 && !(zeroUpdatesOk && numUpdated == 0)) { + throw new HiveMetaException("Failed to properly update the " + tableName + + " table. Expected to update 1 row but instead updated " + numUpdated); + } + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveTable.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveTable.java new file mode 100644 index 0000000..63e2d61 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskMoveTable.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskMoveTable extends HiveSchemaToolTask { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaToolTaskMoveTable.class.getName()); + + private String fromCat; + private String toCat; + private String fromDb; + private String toDb; + private String tableName; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + fromCat = normalizeIdentifier(cl.getOptionValue("fromCatalog")); + toCat = normalizeIdentifier(cl.getOptionValue("toCatalog")); + fromDb = normalizeIdentifier(cl.getOptionValue("fromDatabase")); + toDb = normalizeIdentifier(cl.getOptionValue("toDatabase")); + tableName = normalizeIdentifier(cl.getOptionValue("moveTable")); + } + + @Override + void execute() throws HiveMetaException { + Connection conn = schemaTool.getConnectionToMetastore(true); + boolean success = false; + try { + conn.setAutoCommit(false); + try (Statement stmt = conn.createStatement()) { + updateTableId(stmt); + updateDbNameForTable(stmt, "TAB_COL_STATS", "TABLE_NAME", fromCat, toCat, fromDb, toDb, tableName); + updateDbNameForTable(stmt, "PART_COL_STATS", "TABLE_NAME", fromCat, toCat, fromDb, toDb, tableName); + updateDbNameForTable(stmt, "PARTITION_EVENTS", "TBL_NAME", fromCat, toCat, fromDb, toDb, tableName); + updateDbNameForTable(stmt, "NOTIFICATION_LOG", "TBL_NAME", fromCat, toCat, fromDb, toDb, tableName); + conn.commit(); + success = true; + } + } catch (SQLException se) { + throw new HiveMetaException("Failed to move table", se); + } finally { + try { + if (!success) conn.rollback(); + } catch (SQLException e) { + // Not really much we can do here. + LOG.error("Failed to rollback, everything will probably go bad from here."); + } + + } + } + + private static final String UPDATE_TABLE_ID_STMT = + "update TBLS " + + " set DB_ID = %d " + + " where DB_ID = %d " + + " and TBL_NAME = '%s'"; + + private void updateTableId(Statement stmt) throws SQLException, HiveMetaException { + // Find the old database id + long oldDbId = getDbId(stmt, fromDb, fromCat); + + // Find the new database id + long newDbId = getDbId(stmt, toDb, toCat); + + String update = String.format(schemaTool.quote(UPDATE_TABLE_ID_STMT), newDbId, oldDbId, tableName); + LOG.debug("Going to run " + update); + int numUpdated = stmt.executeUpdate(update); + if (numUpdated != 1) { + throw new HiveMetaException( + "Failed to properly update TBLS table. Expected to update " + + "1 row but instead updated " + numUpdated); + } + } + + private static final String DB_ID_QUERY = + "select DB_ID " + + " from DBS " + + " where NAME = '%s' " + + " and CTLG_NAME = '%s'"; + + private long getDbId(Statement stmt, String db, String catalog) throws SQLException, HiveMetaException { + String query = String.format(schemaTool.quote(DB_ID_QUERY), db, catalog); + LOG.debug("Going to run " + query); + ResultSet rs = stmt.executeQuery(query); + if (!rs.next()) { + throw new HiveMetaException("Unable to find database " + fromDb); + } + return rs.getLong(1); + } + + private void updateDbNameForTable(Statement stmt, String tableName, + String tableColumnName, String fromCat, String toCat, + String fromDb, String toDb, String hiveTblName) + throws HiveMetaException, SQLException { + String update = schemaTool.quote( + "update " + tableName + " " + + " set CAT_NAME = '" + toCat + "', " + + " DB_NAME = '" + toDb + "' " + + " where CAT_NAME = '" + fromCat + "' " + + " and DB_NAME = '" + fromDb + "' " + + " and " + tableColumnName + " = '" + hiveTblName + "'"); + + LOG.debug("Going to run " + update); + int numUpdated = stmt.executeUpdate(update); + if (numUpdated > 1 || numUpdated < 0) { + throw new HiveMetaException("Failed to properly update the " + tableName + + " table. Expected to update 1 row but instead updated " + numUpdated); + } + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskUpgrade.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskUpgrade.java new file mode 100644 index 0000000..e19084c --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskUpgrade.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.MetaStoreConnectionInfo; + +/** + * Perform metastore schema upgrade + */ +class HiveSchemaToolTaskUpgrade extends HiveSchemaToolTask { + private String fromVersion; + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + if (cl.hasOption("upgradeSchemaFrom")) { + this.fromVersion = cl.getOptionValue("upgradeSchemaFrom"); + } + } + + private void ensureFromVersion() throws HiveMetaException { + if (fromVersion != null) { + return; + } + + // If null, then read from the metastore + MetaStoreConnectionInfo connectionInfo = schemaTool.getConnectionInfo(false); + fromVersion = schemaTool.getMetaStoreSchemaInfo().getMetaStoreSchemaVersion(connectionInfo); + if (fromVersion == null || fromVersion.isEmpty()) { + throw new HiveMetaException("Schema version not stored in the metastore. " + + "Metastore schema is too old or corrupt. Try specifying the version manually"); + } + System.out.println("Upgrading from the version " + fromVersion); + } + + @Override + void execute() throws HiveMetaException { + ensureFromVersion(); + + if (schemaTool.getMetaStoreSchemaInfo().getHiveSchemaVersion().equals(fromVersion)) { + System.out.println("No schema upgrade required from version " + fromVersion); + return; + } + + // Find the list of scripts to execute for this upgrade + List upgradeScripts = schemaTool.getMetaStoreSchemaInfo().getUpgradeScripts(fromVersion); + schemaTool.testConnectionToMetastore(); + System.out.println("Starting upgrade metastore schema from version " + fromVersion + " to " + + schemaTool.getMetaStoreSchemaInfo().getHiveSchemaVersion()); + String scriptDir = schemaTool.getMetaStoreSchemaInfo().getMetaStoreScriptDir(); + try { + for (String scriptFile : upgradeScripts) { + System.out.println("Upgrade script " + scriptFile); + if (!schemaTool.isDryRun()) { + runPreUpgrade(scriptDir, scriptFile); + schemaTool.runBeeLine(scriptDir, scriptFile); + System.out.println("Completed " + scriptFile); + } + } + } catch (IOException e) { + throw new HiveMetaException("Upgrade FAILED! Metastore state would be inconsistent !!", e); + } + + // Revalidated the new version after upgrade + schemaTool.verifySchemaVersion(); + } + + /** + * Run pre-upgrade scripts corresponding to a given upgrade script, + * if any exist. The errors from pre-upgrade are ignored. + * Pre-upgrade scripts typically contain setup statements which + * may fail on some database versions and failure is ignorable. + * + * @param scriptDir upgrade script directory name + * @param scriptFile upgrade script file name + */ + private void runPreUpgrade(String scriptDir, String scriptFile) { + for (int i = 0;; i++) { + String preUpgradeScript = schemaTool.getMetaStoreSchemaInfo().getPreUpgradeScriptName(i, scriptFile); + File preUpgradeScriptFile = new File(scriptDir, preUpgradeScript); + if (!preUpgradeScriptFile.isFile()) { + break; + } + + try { + schemaTool.runBeeLine(scriptDir, preUpgradeScript); + System.out.println("Completed " + preUpgradeScript); + } catch (Exception e) { + // Ignore the pre-upgrade script errors + System.err.println("Warning in pre-upgrade script " + preUpgradeScript + ": " + e.getMessage()); + if (schemaTool.isVerbose()) { + e.printStackTrace(); + } + } + } + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskValidate.java b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskValidate.java new file mode 100644 index 0000000..79e8a84 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/schematool/HiveSchemaToolTaskValidate.java @@ -0,0 +1,634 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.beeline.schematool; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.net.URI; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.SQLFeatureNotSupportedException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.MetaStoreConnectionInfo; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.NestedScriptParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; + +/** + * Print Hive version and schema version + */ +class HiveSchemaToolTaskValidate extends HiveSchemaToolTask { + private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaToolTaskValidate.class.getName()); + + @Override + void setCommandLineArguments(HiveSchemaToolCommandLine cl) { + // do nothing + } + + @Override + void execute() throws HiveMetaException { + System.out.println("Starting metastore validation\n"); + Connection conn = schemaTool.getConnectionToMetastore(false); + boolean success = true; + try { + success &= validateSchemaVersions(); + success &= validateSequences(conn); + success &= validateSchemaTables(conn); + success &= validateLocations(conn, schemaTool.getValidationServers()); + success &= validateColumnNullValues(conn); + } finally { + if (conn != null) { + try { + conn.close(); + } catch (SQLException e) { + throw new HiveMetaException("Failed to close metastore connection", e); + } + } + } + + System.out.print("Done with metastore validation: "); + if (!success) { + System.out.println("[FAIL]"); + System.exit(1); + } else { + System.out.println("[SUCCESS]"); + } + } + + boolean validateSchemaVersions() throws HiveMetaException { + System.out.println("Validating schema version"); + try { + String hiveSchemaVersion = schemaTool.getMetaStoreSchemaInfo().getHiveSchemaVersion(); + MetaStoreConnectionInfo connectionInfo = schemaTool.getConnectionInfo(false); + String newSchemaVersion = schemaTool.getMetaStoreSchemaInfo().getMetaStoreSchemaVersion(connectionInfo); + schemaTool.assertCompatibleVersion(hiveSchemaVersion, newSchemaVersion); + } catch (HiveMetaException hme) { + if (hme.getMessage().contains("Metastore schema version is not compatible") || + hme.getMessage().contains("Multiple versions were found in metastore") || + hme.getMessage().contains("Could not find version info in metastore VERSION table")) { + System.err.println(hme.getMessage()); + System.out.println("[FAIL]\n"); + return false; + } else { + throw hme; + } + } + System.out.println("[SUCCESS]\n"); + return true; + } + + private static final String QUERY_SEQ = + " select t.NEXT_VAL" + + " from SEQUENCE_TABLE t " + + " where t.SEQUENCE_NAME = ? " + + "order by t.SEQUENCE_NAME"; + + private static final String QUERY_MAX_ID = + "select max(%s)" + + " from %s"; + + @VisibleForTesting + boolean validateSequences(Connection conn) throws HiveMetaException { + Map> seqNameToTable = + new ImmutableMap.Builder>() + .put("MDatabase", Pair.of("DBS", "DB_ID")) + .put("MRole", Pair.of("ROLES", "ROLE_ID")) + .put("MGlobalPrivilege", Pair.of("GLOBAL_PRIVS", "USER_GRANT_ID")) + .put("MTable", Pair.of("TBLS","TBL_ID")) + .put("MStorageDescriptor", Pair.of("SDS", "SD_ID")) + .put("MSerDeInfo", Pair.of("SERDES", "SERDE_ID")) + .put("MColumnDescriptor", Pair.of("CDS", "CD_ID")) + .put("MTablePrivilege", Pair.of("TBL_PRIVS", "TBL_GRANT_ID")) + .put("MTableColumnStatistics", Pair.of("TAB_COL_STATS", "CS_ID")) + .put("MPartition", Pair.of("PARTITIONS", "PART_ID")) + .put("MPartitionColumnStatistics", Pair.of("PART_COL_STATS", "CS_ID")) + .put("MFunction", Pair.of("FUNCS", "FUNC_ID")) + .put("MIndex", Pair.of("IDXS", "INDEX_ID")) + .put("MStringList", Pair.of("SKEWED_STRING_LIST", "STRING_LIST_ID")) + .build(); + + System.out.println("Validating sequence number for SEQUENCE_TABLE"); + + boolean isValid = true; + try { + Statement stmt = conn.createStatement(); + for (String seqName : seqNameToTable.keySet()) { + String tableName = seqNameToTable.get(seqName).getLeft(); + String tableKey = seqNameToTable.get(seqName).getRight(); + String fullSequenceName = "org.apache.hadoop.hive.metastore.model." + seqName; + String seqQuery = schemaTool.quote(QUERY_SEQ); + String maxIdQuery = String.format(schemaTool.quote(QUERY_MAX_ID), tableKey, tableName); + + ResultSet res = stmt.executeQuery(maxIdQuery); + if (res.next()) { + long maxId = res.getLong(1); + if (maxId > 0) { + PreparedStatement stmtSeq = conn.prepareStatement(seqQuery); + stmtSeq.setString(1, fullSequenceName); + ResultSet resSeq = stmtSeq.executeQuery(); + if (!resSeq.next()) { + isValid = false; + System.err.println("Missing SEQUENCE_NAME " + seqName + " from SEQUENCE_TABLE"); + } else if (resSeq.getLong(1) < maxId) { + isValid = false; + System.err.println("NEXT_VAL for " + seqName + " in SEQUENCE_TABLE < max(" + tableKey + + ") in " + tableName); + } + } + } + } + + System.out.println(isValid ? "[SUCCESS]\n" :"[FAIL]\n"); + return isValid; + } catch (SQLException e) { + throw new HiveMetaException("Failed to validate sequence number for SEQUENCE_TABLE", e); + } + } + + @VisibleForTesting + boolean validateSchemaTables(Connection conn) throws HiveMetaException { + Connection hmsConn = schemaTool.getConnectionToMetastore(false); + + System.out.println("Validating metastore schema tables"); + String version = null; + try { + MetaStoreConnectionInfo connectionInfo = schemaTool.getConnectionInfo(false); + version = schemaTool.getMetaStoreSchemaInfo().getMetaStoreSchemaVersion(connectionInfo); + } catch (HiveMetaException he) { + System.err.println("Failed to determine schema version from Hive Metastore DB. " + he.getMessage()); + System.out.println("Failed in schema table validation."); + LOG.debug("Failed to determine schema version from Hive Metastore DB," + he.getMessage(), he); + return false; + } + + // re-open the hms connection + hmsConn = schemaTool.getConnectionToMetastore(false); + + LOG.debug("Validating tables in the schema for version " + version); + List dbTables = new ArrayList(); + ResultSet rs = null; + try { + String schema = null; + try { + schema = hmsConn.getSchema(); + } catch (SQLFeatureNotSupportedException e) { + LOG.debug("schema is not supported"); + } + + DatabaseMetaData metadata = conn.getMetaData(); + rs = metadata.getTables(null, schema, "%", new String[] {"TABLE"}); + + while (rs.next()) { + String table = rs.getString("TABLE_NAME"); + dbTables.add(table.toLowerCase()); + LOG.debug("Found table " + table + " in HMS dbstore"); + } + } catch (SQLException e) { + throw new HiveMetaException("Failed to retrieve schema tables from Hive Metastore DB," + + e.getMessage(), e); + } finally { + if (rs != null) { + try { + rs.close(); + } catch (SQLException e) { + throw new HiveMetaException("Failed to close resultset", e); + } + } + } + + // parse the schema file to determine the tables that are expected to exist + // we are using oracle schema because it is simpler to parse, no quotes or backticks etc + List schemaTables = new ArrayList(); + List subScripts = new ArrayList(); + + String baseDir = new File(schemaTool.getMetaStoreSchemaInfo().getMetaStoreScriptDir()).getParent(); + String schemaFile = new File(schemaTool.getMetaStoreSchemaInfo().getMetaStoreScriptDir(), + schemaTool.getMetaStoreSchemaInfo().generateInitFileName(version)).getPath(); + try { + LOG.debug("Parsing schema script " + schemaFile); + subScripts.addAll(findCreateTable(schemaFile, schemaTables)); + while (subScripts.size() > 0) { + schemaFile = baseDir + "/" + schemaTool.getDbType() + "/" + subScripts.remove(0); + LOG.debug("Parsing subscript " + schemaFile); + subScripts.addAll(findCreateTable(schemaFile, schemaTables)); + } + } catch (Exception e) { + System.err.println("Exception in parsing schema file. Cause:" + e.getMessage()); + System.out.println("Failed in schema table validation."); + return false; + } + + LOG.debug("Schema tables:[ " + Arrays.toString(schemaTables.toArray()) + " ]"); + LOG.debug("DB tables:[ " + Arrays.toString(dbTables.toArray()) + " ]"); + + // now diff the lists + schemaTables.removeAll(dbTables); + if (schemaTables.size() > 0) { + Collections.sort(schemaTables); + System.err.println("Table(s) [ " + Arrays.toString(schemaTables.toArray()) + " ] " + + "are missing from the metastore database schema."); + System.out.println("[FAIL]\n"); + return false; + } else { + System.out.println("[SUCCESS]\n"); + return true; + } + } + + @VisibleForTesting + List findCreateTable(String path, List tableList) throws Exception { + if (!(new File(path)).exists()) { + throw new Exception(path + " does not exist. Potentially incorrect version in the metastore VERSION table"); + } + + List subs = new ArrayList(); + NestedScriptParser sp = HiveSchemaHelper.getDbCommandParser(schemaTool.getDbType(), false); + Pattern regexp = Pattern.compile("CREATE TABLE(\\s+IF NOT EXISTS)?\\s+(\\S+).*"); + + try (BufferedReader reader = new BufferedReader(new FileReader(path)); ) { + String line = null; + while ((line = reader.readLine()) != null) { + if (sp.isNestedScript(line)) { + String subScript = sp.getScriptName(line); + LOG.debug("Schema subscript " + subScript + " found"); + subs.add(subScript); + continue; + } + line = line.replaceAll("( )+", " "); //suppress multi-spaces + line = line.replaceAll("\\(", " "); + line = line.replaceAll("IF NOT EXISTS ", ""); + line = line.replaceAll("`",""); + line = line.replaceAll("'",""); + line = line.replaceAll("\"",""); + Matcher matcher = regexp.matcher(line); + + if (matcher.find()) { + String table = matcher.group(2); + if (schemaTool.getDbType().equals("derby")) + table = table.replaceAll("APP\\.",""); + tableList.add(table.toLowerCase()); + LOG.debug("Found table " + table + " in the schema"); + } + } + } catch (IOException ex){ + throw new Exception(ex.getMessage()); + } + + return subs; + } + + @VisibleForTesting + boolean validateLocations(Connection conn, URI[] defaultServers) throws HiveMetaException { + System.out.println("Validating DFS locations"); + boolean rtn = true; + rtn &= checkMetaStoreDBLocation(conn, defaultServers); + rtn &= checkMetaStoreTableLocation(conn, defaultServers); + rtn &= checkMetaStorePartitionLocation(conn, defaultServers); + rtn &= checkMetaStoreSkewedColumnsLocation(conn, defaultServers); + System.out.println(rtn ? "[SUCCESS]\n" : "[FAIL]\n"); + return rtn; + } + + private static final String QUERY_DB_LOCATION = + " select dbt.DB_ID, " + + " dbt.NAME, " + + " dbt.DB_LOCATION_URI " + + " from DBS dbt " + + "order by dbt.DB_ID "; + + private boolean checkMetaStoreDBLocation(Connection conn, URI[] defaultServers) throws HiveMetaException { + String dbLocQuery = schemaTool.quote(QUERY_DB_LOCATION); + + int numOfInvalid = 0; + try (Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(dbLocQuery)) { + while (res.next()) { + String locValue = res.getString(3); + String dbName = getNameOrID(res, 2, 1); + if (!checkLocation("Database " + dbName, locValue, defaultServers)) { + numOfInvalid++; + } + } + } catch (SQLException e) { + throw new HiveMetaException("Failed to get DB Location Info.", e); + } + return numOfInvalid == 0; + } + + private static final String TAB_ID_RANGE_QUERY = + "select max(TBL_ID), " + + " min(TBL_ID) " + + " from TBLS "; + + private static final String TAB_LOC_QUERY = + " select tbl.TBL_ID, " + + " tbl.TBL_NAME, " + + " sd.LOCATION, " + + " dbt.DB_ID, " + + " dbt.NAME " + + " from TBLS tbl " + + "inner join SDS sd on sd.SD_ID = tbl.SD_ID " + + "inner join DBS dbt on tbl.DB_ID = dbt.DB_ID " + + " where tbl.TBL_TYPE != '%s' " + + " and tbl.TBL_ID >= ? " + + " and tbl.TBL_ID <= ? " + + " order by tbl.TBL_ID "; + + private static final int TAB_LOC_CHECK_SIZE = 2000; + + private boolean checkMetaStoreTableLocation(Connection conn, URI[] defaultServers) + throws HiveMetaException { + String tabIDRangeQuery = schemaTool.quote(TAB_ID_RANGE_QUERY); + String tabLocQuery = String.format(schemaTool.quote(TAB_LOC_QUERY), TableType.VIRTUAL_VIEW); + + try { + long maxID = 0, minID = 0; + try (Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(tabIDRangeQuery)) { + if (res.next()) { + maxID = res.getLong(1); + minID = res.getLong(2); + } + } + + int numOfInvalid = 0; + try (PreparedStatement pStmt = conn.prepareStatement(tabLocQuery)) { + while (minID <= maxID) { + pStmt.setLong(1, minID); + pStmt.setLong(2, minID + TAB_LOC_CHECK_SIZE); + try (ResultSet res = pStmt.executeQuery()) { + while (res.next()) { + String locValue = res.getString(3); + String entity = "Database " + getNameOrID(res, 5, 4) + ", Table " + getNameOrID(res, 2, 1); + if (!checkLocation(entity, locValue, defaultServers)) { + numOfInvalid++; + } + } + } + minID += TAB_LOC_CHECK_SIZE + 1; + } + } + + return numOfInvalid == 0; + } catch (SQLException e) { + throw new HiveMetaException("Failed to get Table Location Info.", e); + } + } + + private static final String QUERY_PART_ID_RANGE = + "select max(PART_ID)," + + " min(PART_ID)" + + " from PARTITIONS "; + + private static final String QUERY_PART_LOC = + " select pt.PART_ID, " + + " pt.PART_NAME, " + + " sd.LOCATION, " + + " tbl.TBL_ID, " + + " tbl.TBL_NAME, " + + " dbt.DB_ID, " + + " dbt.NAME " + + " from PARTITIONS pt " + + "inner join SDS sd on sd.SD_ID = pt.SD_ID " + + "inner join TBLS tbl on tbl.TBL_ID = pt.TBL_ID " + + "inner join DBS dbt on dbt.DB_ID = tbl.DB_ID " + + " where pt.PART_ID >= ? " + + " and pt.PART_ID <= ? " + + " order by tbl.TBL_ID "; + + private static final int PART_LOC_CHECK_SIZE = 2000; + + private boolean checkMetaStorePartitionLocation(Connection conn, URI[] defaultServers) + throws HiveMetaException { + String queryPartIDRange = schemaTool.quote(QUERY_PART_ID_RANGE); + String queryPartLoc = schemaTool.quote(QUERY_PART_LOC); + + try { + long maxID = 0, minID = 0; + try (Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(queryPartIDRange)) { + if (res.next()) { + maxID = res.getLong(1); + minID = res.getLong(2); + } + } + + int numOfInvalid = 0; + try (PreparedStatement pStmt = conn.prepareStatement(queryPartLoc)) { + while (minID <= maxID) { + pStmt.setLong(1, minID); + pStmt.setLong(2, minID + PART_LOC_CHECK_SIZE); + try (ResultSet res = pStmt.executeQuery()) { + while (res.next()) { + String locValue = res.getString(3); + String entity = "Database " + getNameOrID(res, 7, 6) + ", Table " + getNameOrID(res, 5, 4) + + ", Partition " + getNameOrID(res, 2, 1); + if (!checkLocation(entity, locValue, defaultServers)) { + numOfInvalid++; + } + } + } + minID += PART_LOC_CHECK_SIZE + 1; + } + } + + return numOfInvalid == 0; + } catch (SQLException e) { + throw new HiveMetaException("Failed to get Partition Location Info.", e); + } + } + + private static final String QUERY_SKEWED_COL_ID_RANGE = + "select max(STRING_LIST_ID_KID), " + + " min(STRING_LIST_ID_KID) " + + " from SKEWED_COL_VALUE_LOC_MAP "; + + private static final String QUERY_SKEWED_COL_LOC = + " select t.TBL_NAME, " + + " t.TBL_ID, " + + " sk.STRING_LIST_ID_KID, " + + " sk.LOCATION, " + + " db.NAME, " + + " db.DB_ID " + + " from TBLS t " + + " join SDS s on s.SD_ID = t.SD_ID " + + " join DBS db on db.DB_ID = t.DB_ID " + + " join SKEWED_COL_VALUE_LOC_MAP sk on sk.SD_ID = s.SD_ID " + + " where sk.STRING_LIST_ID_KID >= ? " + + " and sk.STRING_LIST_ID_KID <= ? " + + "order by t.TBL_ID "; + + private static final int SKEWED_COL_LOC_CHECK_SIZE = 2000; + + private boolean checkMetaStoreSkewedColumnsLocation(Connection conn, URI[] defaultServers) + throws HiveMetaException { + String querySkewedColIDRange = schemaTool.quote(QUERY_SKEWED_COL_ID_RANGE); + String querySkewedColLoc = schemaTool.quote(QUERY_SKEWED_COL_LOC); + + try { + long maxID = 0, minID = 0; + try (Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(querySkewedColIDRange)) { + if (res.next()) { + maxID = res.getLong(1); + minID = res.getLong(2); + } + } + + int numOfInvalid = 0; + try (PreparedStatement pStmt = conn.prepareStatement(querySkewedColLoc)) { + while (minID <= maxID) { + pStmt.setLong(1, minID); + pStmt.setLong(2, minID + SKEWED_COL_LOC_CHECK_SIZE); + try (ResultSet res = pStmt.executeQuery()) { + while (res.next()) { + String locValue = res.getString(4); + String entity = "Database " + getNameOrID(res, 5, 6) + ", Table " + getNameOrID(res, 1, 2) + + ", String list " + res.getString(3); + if (!checkLocation(entity, locValue, defaultServers)) { + numOfInvalid++; + } + } + } + minID += SKEWED_COL_LOC_CHECK_SIZE + 1; + } + } + + return numOfInvalid == 0; + } catch (SQLException e) { + throw new HiveMetaException("Failed to get skewed columns location info.", e); + } + } + + /** + * Check if the location is valid for the given entity + * @param entity the entity to represent a database, partition or table + * @param entityLocation the location + * @param defaultServers a list of the servers that the location needs to match. + * The location host needs to match one of the given servers. + * If empty, then no check against such list. + * @return true if the location is valid + */ + private boolean checkLocation(String entity, String entityLocation, URI[] defaultServers) { + boolean isValid = true; + + if (entityLocation == null) { + System.err.println(entity + ", Error: empty location"); + isValid = false; + } else { + try { + URI currentUri = new Path(entityLocation).toUri(); + String scheme = currentUri.getScheme(); + String path = currentUri.getPath(); + if (StringUtils.isEmpty(scheme)) { + System.err.println(entity + ", Location: "+ entityLocation + ", Error: missing location scheme."); + isValid = false; + } else if (StringUtils.isEmpty(path)) { + System.err.println(entity + ", Location: "+ entityLocation + ", Error: missing location path."); + isValid = false; + } else if (ArrayUtils.isNotEmpty(defaultServers) && currentUri.getAuthority() != null) { + String authority = currentUri.getAuthority(); + boolean matchServer = false; + for(URI server : defaultServers) { + if (StringUtils.equalsIgnoreCase(server.getScheme(), scheme) && + StringUtils.equalsIgnoreCase(server.getAuthority(), authority)) { + matchServer = true; + break; + } + } + if (!matchServer) { + System.err.println(entity + ", Location: " + entityLocation + ", Error: mismatched server."); + isValid = false; + } + } + + // if there is no path element other than "/", report it but not fail + if (isValid && StringUtils.containsOnly(path, "/")) { + System.err.println(entity + ", Location: "+ entityLocation + ", Warn: location set to root, not a recommended config."); + } + } catch (Exception pe) { + System.err.println(entity + ", Error: invalid location - " + pe.getMessage()); + isValid =false; + } + } + + return isValid; + } + + private String getNameOrID(ResultSet res, int nameInx, int idInx) throws SQLException { + String itemName = res.getString(nameInx); + return (itemName == null || itemName.isEmpty()) ? "ID: " + res.getString(idInx) : "Name: " + itemName; + } + + private static final String QUERY_COLUMN_NULL_VALUES = + " select t.*" + + " from TBLS t" + + " where t.SD_ID IS NULL" + + " and (t.TBL_TYPE = '" + TableType.EXTERNAL_TABLE + "' or" + + " t.TBL_TYPE = '" + TableType.MANAGED_TABLE + "') " + + "order by t.TBL_ID "; + + @VisibleForTesting + boolean validateColumnNullValues(Connection conn) throws HiveMetaException { + System.out.println("Validating columns for incorrect NULL values."); + + boolean isValid = true; + String queryColumnNullValues = schemaTool.quote(QUERY_COLUMN_NULL_VALUES); + + try (Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(queryColumnNullValues)) { + while (res.next()) { + long tableId = res.getLong("TBL_ID"); + String tableName = res.getString("TBL_NAME"); + String tableType = res.getString("TBL_TYPE"); + isValid = false; + System.err.println("SD_ID in TBLS should not be NULL for Table Name=" + tableName + ", Table ID=" + tableId + ", Table Type=" + tableType); + } + + System.out.println(isValid ? "[SUCCESS]\n" : "[FAIL]\n"); + return isValid; + } catch(SQLException e) { + throw new HiveMetaException("Failed to validate columns for incorrect NULL values", e); + } + } +} diff --git a/beeline/src/test/org/apache/hive/beeline/TestHiveSchemaTool.java b/beeline/src/test/org/apache/hive/beeline/TestHiveSchemaTool.java deleted file mode 100644 index 3d5f086..0000000 --- a/beeline/src/test/org/apache/hive/beeline/TestHiveSchemaTool.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.beeline; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.powermock.core.classloader.annotations.PowerMockIgnore; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; - -import java.io.File; -import java.io.IOException; -import java.util.Arrays; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.eq; -import static org.mockito.Matchers.same; -import static org.mockito.Mockito.when; -import static org.powermock.api.mockito.PowerMockito.mockStatic; -import static org.powermock.api.mockito.PowerMockito.verifyStatic; - -@RunWith(PowerMockRunner.class) -@PowerMockIgnore("javax.management.*") -@PrepareForTest({ HiveSchemaHelper.class, HiveSchemaTool.CommandBuilder.class }) -public class TestHiveSchemaTool { - - String scriptFile = System.getProperty("java.io.tmpdir") + File.separator + "someScript.sql"; - @Mock - private HiveConf hiveConf; - private HiveSchemaTool.CommandBuilder builder; - private String pasword = "reallySimplePassword"; - - @Before - public void setup() throws IOException { - mockStatic(HiveSchemaHelper.class); - when(HiveSchemaHelper - .getValidConfVar(eq(MetastoreConf.ConfVars.CONNECT_URL_KEY), same(hiveConf))) - .thenReturn("someURL"); - when(HiveSchemaHelper - .getValidConfVar(eq(MetastoreConf.ConfVars.CONNECTION_DRIVER), same(hiveConf))) - .thenReturn("someDriver"); - - File file = new File(scriptFile); - if (!file.exists()) { - file.createNewFile(); - } - builder = new HiveSchemaTool.CommandBuilder(hiveConf, null, null, "testUser", pasword, scriptFile); - } - - @After - public void globalAssert() throws IOException { - verifyStatic(); - HiveSchemaHelper.getValidConfVar(eq(MetastoreConf.ConfVars.CONNECT_URL_KEY), same(hiveConf)); - HiveSchemaHelper - .getValidConfVar(eq(MetastoreConf.ConfVars.CONNECTION_DRIVER), same(hiveConf)); - - new File(scriptFile).delete(); - } - - @Test - public void shouldReturnStrippedPassword() throws IOException { - assertFalse(builder.buildToLog().contains(pasword)); - } - - @Test - public void shouldReturnActualPassword() throws IOException { - String[] strings = builder.buildToRun(); - assertTrue(Arrays.asList(strings).contains(pasword)); - } -} diff --git a/beeline/src/test/org/apache/hive/beeline/schematool/TestHiveSchemaTool.java b/beeline/src/test/org/apache/hive/beeline/schematool/TestHiveSchemaTool.java new file mode 100644 index 0000000..8b477bd --- /dev/null +++ b/beeline/src/test/org/apache/hive/beeline/schematool/TestHiveSchemaTool.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline.schematool; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.powermock.core.classloader.annotations.PowerMockIgnore; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.eq; +import static org.mockito.Matchers.same; +import static org.mockito.Mockito.when; +import static org.powermock.api.mockito.PowerMockito.mockStatic; +import static org.powermock.api.mockito.PowerMockito.verifyStatic; + +@RunWith(PowerMockRunner.class) +@PowerMockIgnore("javax.management.*") +@PrepareForTest({ HiveSchemaHelper.class, HiveSchemaTool.CommandBuilder.class }) +public class TestHiveSchemaTool { + + String scriptFile = System.getProperty("java.io.tmpdir") + File.separator + "someScript.sql"; + @Mock + private HiveConf hiveConf; + private HiveSchemaTool.CommandBuilder builder; + private String pasword = "reallySimplePassword"; + + @Before + public void setup() throws IOException { + mockStatic(HiveSchemaHelper.class); + when(HiveSchemaHelper + .getValidConfVar(eq(MetastoreConf.ConfVars.CONNECT_URL_KEY), same(hiveConf))) + .thenReturn("someURL"); + when(HiveSchemaHelper + .getValidConfVar(eq(MetastoreConf.ConfVars.CONNECTION_DRIVER), same(hiveConf))) + .thenReturn("someDriver"); + + File file = new File(scriptFile); + if (!file.exists()) { + file.createNewFile(); + } + builder = new HiveSchemaTool.CommandBuilder(hiveConf, null, null, "testUser", pasword, scriptFile); + } + + @After + public void globalAssert() throws IOException { + verifyStatic(); + HiveSchemaHelper.getValidConfVar(eq(MetastoreConf.ConfVars.CONNECT_URL_KEY), same(hiveConf)); + HiveSchemaHelper + .getValidConfVar(eq(MetastoreConf.ConfVars.CONNECTION_DRIVER), same(hiveConf)); + + new File(scriptFile).delete(); + } + + @Test + public void shouldReturnStrippedPassword() throws IOException { + assertFalse(builder.buildToLog().contains(pasword)); + } + + @Test + public void shouldReturnActualPassword() throws IOException { + String[] strings = builder.buildToRun(); + assertTrue(Arrays.asList(strings).contains(pasword)); + } +} diff --git a/bin/ext/schemaTool.sh b/bin/ext/schemaTool.sh index 94c56ef..e30bc11 100644 --- a/bin/ext/schemaTool.sh +++ b/bin/ext/schemaTool.sh @@ -18,12 +18,12 @@ export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} " schemaTool() { HIVE_OPTS='' - CLASS=org.apache.hive.beeline.HiveSchemaTool + CLASS=org.apache.hive.beeline.schematool.HiveSchemaTool execHiveCmd $CLASS "$@" } schemaTool_help () { HIVE_OPTS='' - CLASS=org.apache.hive.beeline.HiveSchemaTool + CLASS=org.apache.hive.beelinei.schematool.HiveSchemaTool execHiveCmd $CLASS "--help" } diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java deleted file mode 100644 index 3b22f15..0000000 --- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java +++ /dev/null @@ -1,801 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.beeline; - -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.OutputStream; -import java.io.PrintStream; -import java.net.URI; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.SQLException; -import java.util.Random; - -import junit.framework.TestCase; - -import org.apache.commons.dbcp.DelegatingConnection; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaException; -import org.apache.hadoop.hive.metastore.IMetaStoreSchemaInfo; -import org.apache.hadoop.hive.metastore.MetaStoreSchemaInfoFactory; -import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; -import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.NestedScriptParser; -import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.PostgresCommandParser; -import org.apache.hadoop.hive.shims.ShimLoader; - -public class TestSchemaTool extends TestCase { - private HiveSchemaTool schemaTool; - private Connection conn; - private HiveConf hiveConf; - private String testMetastoreDB; - private PrintStream errStream; - private PrintStream outStream; - - @Override - protected void setUp() throws Exception { - super.setUp(); - testMetastoreDB = System.getProperty("java.io.tmpdir") + - File.separator + "test_metastore-" + new Random().nextInt(); - System.setProperty(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, - "jdbc:derby:" + testMetastoreDB + ";create=true"); - hiveConf = new HiveConf(this.getClass()); - schemaTool = new HiveSchemaTool( - System.getProperty("test.tmp.dir", "target/tmp"), hiveConf, "derby", null); - schemaTool.setUserName( - schemaTool.getHiveConf().get(HiveConf.ConfVars.METASTORE_CONNECTION_USER_NAME.varname)); - schemaTool.setPassWord(ShimLoader.getHadoopShims().getPassword(schemaTool.getHiveConf(), - HiveConf.ConfVars.METASTOREPWD.varname)); - System.setProperty("beeLine.system.exit", "true"); - errStream = System.err; - outStream = System.out; - conn = schemaTool.getConnectionToMetastore(false); - } - - @Override - protected void tearDown() throws Exception { - File metaStoreDir = new File(testMetastoreDB); - if (metaStoreDir.exists()) { - FileUtils.forceDeleteOnExit(metaStoreDir); - } - System.setOut(outStream); - System.setErr(errStream); - if (conn != null) { - conn.close(); - } - } - - /** - * Test the sequence validation functionality - * @throws Exception - */ - public void testValidateSequences() throws Exception { - schemaTool.doInit(); - - // Test empty database - boolean isValid = schemaTool.validateSequences(conn); - assertTrue(isValid); - - // Test valid case - String[] scripts = new String[] { - "insert into CTLGS values(99, 'test_cat_1', 'description', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb')", - "insert into SEQUENCE_TABLE values('org.apache.hadoop.hive.metastore.model.MDatabase', 100)", - "insert into DBS values(99, 'test db1', 'hdfs:///tmp', 'db1', 'test', 'test', 'test_cat_1')" - }; - File scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - isValid = schemaTool.validateSequences(conn); - assertTrue(isValid); - - // Test invalid case - scripts = new String[] { - "delete from SEQUENCE_TABLE", - "delete from DBS", - "insert into SEQUENCE_TABLE values('org.apache.hadoop.hive.metastore.model.MDatabase', 100)", - "insert into DBS values(102, 'test db1', 'hdfs:///tmp', 'db1', 'test', 'test', 'test_cat_1')" - }; - scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - isValid = schemaTool.validateSequences(conn); - assertFalse(isValid); - } - - /** - * Test to validate that all tables exist in the HMS metastore. - * @throws Exception - */ - public void testValidateSchemaTables() throws Exception { - schemaTool.doInit("2.0.0"); - - boolean isValid = (boolean)schemaTool.validateSchemaTables(conn); - assertTrue(isValid); - - // upgrade from 2.0.0 schema and re-validate - schemaTool.doUpgrade("2.0.0"); - isValid = (boolean)schemaTool.validateSchemaTables(conn); - assertTrue(isValid); - - // Simulate a missing table scenario by renaming a couple of tables - String[] scripts = new String[] { - "RENAME TABLE SEQUENCE_TABLE to SEQUENCE_TABLE_RENAMED", - "RENAME TABLE NUCLEUS_TABLES to NUCLEUS_TABLES_RENAMED" - }; - - File scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - isValid = schemaTool.validateSchemaTables(conn); - assertFalse(isValid); - - // Restored the renamed tables - scripts = new String[] { - "RENAME TABLE SEQUENCE_TABLE_RENAMED to SEQUENCE_TABLE", - "RENAME TABLE NUCLEUS_TABLES_RENAMED to NUCLEUS_TABLES" - }; - - scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - isValid = schemaTool.validateSchemaTables(conn); - assertTrue(isValid); - - // Check that an exception from getMetaData() is reported correctly - try { - // Make a Connection object that will throw an exception - BadMetaDataConnection bad = new BadMetaDataConnection(conn); - schemaTool.validateSchemaTables(bad); - fail("did not get expected exception"); - } catch (HiveMetaException hme) { - String message = hme.getMessage(); - assertTrue("Bad HiveMetaException message :" + message, - message.contains("Failed to retrieve schema tables from Hive Metastore DB")); - Throwable cause = hme.getCause(); - assertNotNull("HiveMetaException did not contain a cause", cause); - String causeMessage = cause.getMessage(); - assertTrue("Bad SQLException message: " + causeMessage, causeMessage.contains( - BadMetaDataConnection.FAILURE_TEXT)); - } - } - - /* - * Test the validation of incorrect NULL values in the tables - * @throws Exception - */ - public void testValidateNullValues() throws Exception { - schemaTool.doInit(); - - // Test empty database - boolean isValid = schemaTool.validateColumnNullValues(conn); - assertTrue(isValid); - - // Test valid case - createTestHiveTableSchemas(); - isValid = schemaTool.validateColumnNullValues(conn); - - // Test invalid case - String[] scripts = new String[] { - "update TBLS set SD_ID=null" - }; - File scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - isValid = schemaTool.validateColumnNullValues(conn); - assertFalse(isValid); - } - - /** - * Test dryrun of schema initialization - * @throws Exception - */ - public void testSchemaInitDryRun() throws Exception { - schemaTool.setDryRun(true); - schemaTool.doInit("0.7.0"); - schemaTool.setDryRun(false); - try { - schemaTool.verifySchemaVersion(); - } catch (HiveMetaException e) { - // The connection should fail since it the dry run - return; - } - fail("Dry run shouldn't create actual metastore"); - } - - /** - * Test dryrun of schema upgrade - * @throws Exception - */ - public void testSchemaUpgradeDryRun() throws Exception { - schemaTool.doInit("0.7.0"); - - schemaTool.setDryRun(true); - schemaTool.doUpgrade("0.7.0"); - schemaTool.setDryRun(false); - try { - schemaTool.verifySchemaVersion(); - } catch (HiveMetaException e) { - // The connection should fail since it the dry run - return; - } - fail("Dry run shouldn't upgrade metastore schema"); - } - - /** - * Test schema initialization - * @throws Exception - */ - public void testSchemaInit() throws Exception { - IMetaStoreSchemaInfo metastoreSchemaInfo = MetaStoreSchemaInfoFactory.get(hiveConf, - System.getProperty("test.tmp.dir", "target/tmp"), "derby"); - schemaTool.doInit(metastoreSchemaInfo.getHiveSchemaVersion()); - schemaTool.verifySchemaVersion(); - } - - /** - * Test validation for schema versions - * @throws Exception - */ - public void testValidateSchemaVersions() throws Exception { - schemaTool.doInit(); - boolean isValid = schemaTool.validateSchemaVersions(); - // Test an invalid case with multiple versions - String[] scripts = new String[] { - "insert into VERSION values(100, '2.2.0', 'Hive release version 2.2.0')" - }; - File scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - isValid = schemaTool.validateSchemaVersions(); - assertFalse(isValid); - - scripts = new String[] { - "delete from VERSION where VER_ID = 100" - }; - scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - isValid = schemaTool.validateSchemaVersions(); - assertTrue(isValid); - - // Test an invalid case without version - scripts = new String[] { - "delete from VERSION" - }; - scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - isValid = schemaTool.validateSchemaVersions(); - assertFalse(isValid); - } - - /** - * Test schema upgrade - * @throws Exception - */ - public void testSchemaUpgrade() throws Exception { - boolean foundException = false; - // Initialize 0.7.0 schema - schemaTool.doInit("0.7.0"); - // verify that driver fails due to older version schema - try { - schemaTool.verifySchemaVersion(); - } catch (HiveMetaException e) { - // Expected to fail due to old schema - foundException = true; - } - if (!foundException) { - throw new Exception( - "Hive operations shouldn't pass with older version schema"); - } - - // Generate dummy pre-upgrade script with errors - String invalidPreUpgradeScript = writeDummyPreUpgradeScript( - 0, "upgrade-0.11.0-to-0.12.0.derby.sql", "foo bar;"); - // Generate dummy pre-upgrade scripts with valid SQL - String validPreUpgradeScript0 = writeDummyPreUpgradeScript( - 0, "upgrade-0.12.0-to-0.13.0.derby.sql", - "CREATE TABLE schema_test0 (id integer);"); - String validPreUpgradeScript1 = writeDummyPreUpgradeScript( - 1, "upgrade-0.12.0-to-0.13.0.derby.sql", - "CREATE TABLE schema_test1 (id integer);"); - - // Capture system out and err - schemaTool.setVerbose(true); - OutputStream stderr = new ByteArrayOutputStream(); - PrintStream errPrintStream = new PrintStream(stderr); - System.setErr(errPrintStream); - OutputStream stdout = new ByteArrayOutputStream(); - PrintStream outPrintStream = new PrintStream(stdout); - System.setOut(outPrintStream); - - // Upgrade schema from 0.7.0 to latest - schemaTool.doUpgrade("0.7.0"); - - // Verify that the schemaTool ran pre-upgrade scripts and ignored errors - assertTrue(stderr.toString().contains(invalidPreUpgradeScript)); - assertTrue(stderr.toString().contains("foo")); - assertFalse(stderr.toString().contains(validPreUpgradeScript0)); - assertFalse(stderr.toString().contains(validPreUpgradeScript1)); - assertTrue(stdout.toString().contains(validPreUpgradeScript0)); - assertTrue(stdout.toString().contains(validPreUpgradeScript1)); - - // Verify that driver works fine with latest schema - schemaTool.verifySchemaVersion(); - } - - /** - * Test script formatting - * @throws Exception - */ - public void testScripts() throws Exception { - String testScript[] = { - "-- this is a comment", - "DROP TABLE IF EXISTS fooTab;", - "/*!1234 this is comment code like mysql */;", - "CREATE TABLE fooTab(id INTEGER);", - "DROP TABLE footab;", - "-- ending comment" - }; - String resultScript[] = { - "DROP TABLE IF EXISTS fooTab", - "/*!1234 this is comment code like mysql */", - "CREATE TABLE fooTab(id INTEGER)", - "DROP TABLE footab", - }; - String expectedSQL = StringUtils.join(resultScript, System.getProperty("line.separator")) + - System.getProperty("line.separator"); - File testScriptFile = generateTestScript(testScript); - String flattenedSql = HiveSchemaHelper.getDbCommandParser("derby", false) - .buildCommand(testScriptFile.getParentFile().getPath(), - testScriptFile.getName()); - - assertEquals(expectedSQL, flattenedSql); - } - - /** - * Test nested script formatting - * @throws Exception - */ - public void testNestedScriptsForDerby() throws Exception { - String childTab1 = "childTab1"; - String childTab2 = "childTab2"; - String parentTab = "fooTab"; - - String childTestScript1[] = { - "-- this is a comment ", - "DROP TABLE IF EXISTS " + childTab1 + ";", - "CREATE TABLE " + childTab1 + "(id INTEGER);", - "DROP TABLE " + childTab1 + ";" - }; - String childTestScript2[] = { - "-- this is a comment", - "DROP TABLE IF EXISTS " + childTab2 + ";", - "CREATE TABLE " + childTab2 + "(id INTEGER);", - "-- this is also a comment", - "DROP TABLE " + childTab2 + ";" - }; - - String parentTestScript[] = { - " -- this is a comment", - "DROP TABLE IF EXISTS " + parentTab + ";", - " -- this is another comment ", - "CREATE TABLE " + parentTab + "(id INTEGER);", - "RUN '" + generateTestScript(childTestScript1).getName() + "';", - "DROP TABLE " + parentTab + ";", - "RUN '" + generateTestScript(childTestScript2).getName() + "';", - "--ending comment ", - }; - - File testScriptFile = generateTestScript(parentTestScript); - String flattenedSql = HiveSchemaHelper.getDbCommandParser("derby", false) - .buildCommand(testScriptFile.getParentFile().getPath(), - testScriptFile.getName()); - assertFalse(flattenedSql.contains("RUN")); - assertFalse(flattenedSql.contains("comment")); - assertTrue(flattenedSql.contains(childTab1)); - assertTrue(flattenedSql.contains(childTab2)); - assertTrue(flattenedSql.contains(parentTab)); - } - - /** - * Test nested script formatting - * @throws Exception - */ - public void testNestedScriptsForMySQL() throws Exception { - String childTab1 = "childTab1"; - String childTab2 = "childTab2"; - String parentTab = "fooTab"; - - String childTestScript1[] = { - "/* this is a comment code */", - "DROP TABLE IF EXISTS " + childTab1 + ";", - "CREATE TABLE " + childTab1 + "(id INTEGER);", - "DROP TABLE " + childTab1 + ";" - }; - String childTestScript2[] = { - "/* this is a special exec code */;", - "DROP TABLE IF EXISTS " + childTab2 + ";", - "CREATE TABLE " + childTab2 + "(id INTEGER);", - "-- this is a comment", - "DROP TABLE " + childTab2 + ";" - }; - - String parentTestScript[] = { - " -- this is a comment", - "DROP TABLE IF EXISTS " + parentTab + ";", - " /* this is special exec code */;", - "CREATE TABLE " + parentTab + "(id INTEGER);", - "SOURCE " + generateTestScript(childTestScript1).getName() + ";", - "DROP TABLE " + parentTab + ";", - "SOURCE " + generateTestScript(childTestScript2).getName() + ";", - "--ending comment ", - }; - - File testScriptFile = generateTestScript(parentTestScript); - String flattenedSql = HiveSchemaHelper.getDbCommandParser("mysql", false) - .buildCommand(testScriptFile.getParentFile().getPath(), - testScriptFile.getName()); - assertFalse(flattenedSql.contains("RUN")); - assertFalse(flattenedSql.contains("comment")); - assertTrue(flattenedSql.contains(childTab1)); - assertTrue(flattenedSql.contains(childTab2)); - assertTrue(flattenedSql.contains(parentTab)); - } - - /** - * Test script formatting - * @throws Exception - */ - public void testScriptWithDelimiter() throws Exception { - String testScript[] = { - "-- this is a comment", - "DROP TABLE IF EXISTS fooTab;", - "DELIMITER $$", - "/*!1234 this is comment code like mysql */$$", - "CREATE TABLE fooTab(id INTEGER)$$", - "CREATE PROCEDURE fooProc()", - "SELECT * FROM fooTab;", - "CALL barProc();", - "END PROCEDURE$$", - "DELIMITER ;", - "DROP TABLE footab;", - "-- ending comment" - }; - String resultScript[] = { - "DROP TABLE IF EXISTS fooTab", - "/*!1234 this is comment code like mysql */", - "CREATE TABLE fooTab(id INTEGER)", - "CREATE PROCEDURE fooProc()" + " " + - "SELECT * FROM fooTab;" + " " + - "CALL barProc();" + " " + - "END PROCEDURE", - "DROP TABLE footab", - }; - String expectedSQL = StringUtils.join(resultScript, System.getProperty("line.separator")) + - System.getProperty("line.separator"); - File testScriptFile = generateTestScript(testScript); - NestedScriptParser testDbParser = HiveSchemaHelper.getDbCommandParser("mysql", false); - String flattenedSql = testDbParser.buildCommand(testScriptFile.getParentFile().getPath(), - testScriptFile.getName()); - - assertEquals(expectedSQL, flattenedSql); - } - - /** - * Test script formatting - * @throws Exception - */ - public void testScriptMultiRowComment() throws Exception { - String testScript[] = { - "-- this is a comment", - "DROP TABLE IF EXISTS fooTab;", - "DELIMITER $$", - "/*!1234 this is comment code like mysql */$$", - "CREATE TABLE fooTab(id INTEGER)$$", - "DELIMITER ;", - "/* multiline comment started ", - " * multiline comment continue", - " * multiline comment ended */", - "DROP TABLE footab;", - "-- ending comment" - }; - String parsedScript[] = { - "DROP TABLE IF EXISTS fooTab", - "/*!1234 this is comment code like mysql */", - "CREATE TABLE fooTab(id INTEGER)", - "DROP TABLE footab", - }; - - String expectedSQL = StringUtils.join(parsedScript, System.getProperty("line.separator")) + - System.getProperty("line.separator"); - File testScriptFile = generateTestScript(testScript); - NestedScriptParser testDbParser = HiveSchemaHelper.getDbCommandParser("mysql", false); - String flattenedSql = testDbParser.buildCommand(testScriptFile.getParentFile().getPath(), - testScriptFile.getName()); - - assertEquals(expectedSQL, flattenedSql); - } - - /** - * Test nested script formatting - * @throws Exception - */ - public void testNestedScriptsForOracle() throws Exception { - String childTab1 = "childTab1"; - String childTab2 = "childTab2"; - String parentTab = "fooTab"; - - String childTestScript1[] = { - "-- this is a comment ", - "DROP TABLE IF EXISTS " + childTab1 + ";", - "CREATE TABLE " + childTab1 + "(id INTEGER);", - "DROP TABLE " + childTab1 + ";" - }; - String childTestScript2[] = { - "-- this is a comment", - "DROP TABLE IF EXISTS " + childTab2 + ";", - "CREATE TABLE " + childTab2 + "(id INTEGER);", - "-- this is also a comment", - "DROP TABLE " + childTab2 + ";" - }; - - String parentTestScript[] = { - " -- this is a comment", - "DROP TABLE IF EXISTS " + parentTab + ";", - " -- this is another comment ", - "CREATE TABLE " + parentTab + "(id INTEGER);", - "@" + generateTestScript(childTestScript1).getName() + ";", - "DROP TABLE " + parentTab + ";", - "@" + generateTestScript(childTestScript2).getName() + ";", - "--ending comment ", - }; - - File testScriptFile = generateTestScript(parentTestScript); - String flattenedSql = HiveSchemaHelper.getDbCommandParser("oracle", false) - .buildCommand(testScriptFile.getParentFile().getPath(), - testScriptFile.getName()); - assertFalse(flattenedSql.contains("@")); - assertFalse(flattenedSql.contains("comment")); - assertTrue(flattenedSql.contains(childTab1)); - assertTrue(flattenedSql.contains(childTab2)); - assertTrue(flattenedSql.contains(parentTab)); - } - - /** - * Test script formatting - * @throws Exception - */ - public void testPostgresFilter() throws Exception { - String testScript[] = { - "-- this is a comment", - "DROP TABLE IF EXISTS fooTab;", - HiveSchemaHelper.PostgresCommandParser.POSTGRES_STANDARD_STRINGS_OPT + ";", - "CREATE TABLE fooTab(id INTEGER);", - "DROP TABLE footab;", - "-- ending comment" - }; - - String expectedScriptWithOptionPresent[] = { - "DROP TABLE IF EXISTS fooTab", - HiveSchemaHelper.PostgresCommandParser.POSTGRES_STANDARD_STRINGS_OPT, - "CREATE TABLE fooTab(id INTEGER)", - "DROP TABLE footab", - }; - - NestedScriptParser noDbOptParser = HiveSchemaHelper - .getDbCommandParser("postgres", false); - String expectedSQL = StringUtils.join( - expectedScriptWithOptionPresent, System.getProperty("line.separator")) + - System.getProperty("line.separator"); - File testScriptFile = generateTestScript(testScript); - String flattenedSql = noDbOptParser.buildCommand( - testScriptFile.getParentFile().getPath(), testScriptFile.getName()); - assertEquals(expectedSQL, flattenedSql); - - String expectedScriptWithOptionAbsent[] = { - "DROP TABLE IF EXISTS fooTab", - "CREATE TABLE fooTab(id INTEGER)", - "DROP TABLE footab", - }; - - NestedScriptParser dbOptParser = HiveSchemaHelper.getDbCommandParser( - "postgres", - PostgresCommandParser.POSTGRES_SKIP_STANDARD_STRINGS_DBOPT, - null, null, null, null, false); - expectedSQL = StringUtils.join( - expectedScriptWithOptionAbsent, System.getProperty("line.separator")) + - System.getProperty("line.separator"); - testScriptFile = generateTestScript(testScript); - flattenedSql = dbOptParser.buildCommand( - testScriptFile.getParentFile().getPath(), testScriptFile.getName()); - assertEquals(expectedSQL, flattenedSql); - } - - /** - * Test validate uri of locations - * @throws Exception - */ - public void testValidateLocations() throws Exception { - schemaTool.doInit(); - URI defaultRoot = new URI("hdfs://myhost.com:8020"); - URI defaultRoot2 = new URI("s3://myhost2.com:8888"); - //check empty DB - boolean isValid = schemaTool.validateLocations(conn, null); - assertTrue(isValid); - isValid = schemaTool.validateLocations(conn, new URI[] {defaultRoot,defaultRoot2}); - assertTrue(isValid); - - // Test valid case - String[] scripts = new String[] { - "insert into CTLGS values(3, 'test_cat_2', 'description', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb')", - "insert into DBS values(2, 'my db', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb', 'mydb', 'public', 'role', 'test_cat_2')", - "insert into DBS values(7, 'db with bad port', 'hdfs://myhost.com:8020/', 'haDB', 'public', 'role', 'test_cat_2')", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (3,null,'org.apache.hadoop.mapred.TextInputFormat','N','N',null,-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL,'n')", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (3 ,1435255431,2,0 ,'hive',0,3,'myView','VIRTUAL_VIEW','select a.col1,a.col2 from foo','select * from foo','n')", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (4012 ,1435255431,7,0 ,'hive',0,4000,'mytal4012','MANAGED_TABLE',NULL,NULL,'n')", - "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)", - "insert into SKEWED_STRING_LIST values(1)", - "insert into SKEWED_STRING_LIST values(2)", - "insert into SKEWED_COL_VALUE_LOC_MAP values(1,1,'hdfs://myhost.com:8020/user/hive/warehouse/mytal/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/')", - "insert into SKEWED_COL_VALUE_LOC_MAP values(2,2,'s3://myhost.com:8020/user/hive/warehouse/mytal/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/')" - }; - File scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - isValid = schemaTool.validateLocations(conn, null); - assertTrue(isValid); - isValid = schemaTool.validateLocations(conn, new URI[] {defaultRoot, defaultRoot2}); - assertTrue(isValid); - scripts = new String[] { - "delete from SKEWED_COL_VALUE_LOC_MAP", - "delete from SKEWED_STRING_LIST", - "delete from PARTITIONS", - "delete from TBLS", - "delete from SDS", - "delete from DBS", - "insert into DBS values(2, 'my db', '/user/hive/warehouse/mydb', 'mydb', 'public', 'role', 'test_cat_2')", - "insert into DBS values(4, 'my db2', 'hdfs://myhost.com:8020', '', 'public', 'role', 'test_cat_2')", - "insert into DBS values(6, 'db with bad port', 'hdfs://myhost.com:8020:', 'zDB', 'public', 'role', 'test_cat_2')", - "insert into DBS values(7, 'db with bad port', 'hdfs://mynameservice.com/', 'haDB', 'public', 'role', 'test_cat_2')", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://yourhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','file:///user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL,'n')", - "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (3000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','yourhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4001,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4003,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4004,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4002,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (5000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','file:///user/admin/2016_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (3000 ,1435255431,2,0 ,'hive',0,3000,'mytal3000','MANAGED_TABLE',NULL,NULL,'n')", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (4011 ,1435255431,4,0 ,'hive',0,4001,'mytal4011','MANAGED_TABLE',NULL,NULL,'n')", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (4012 ,1435255431,4,0 ,'hive',0,4002,'','MANAGED_TABLE',NULL,NULL,'n')", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (4013 ,1435255431,4,0 ,'hive',0,4003,'mytal4013','MANAGED_TABLE',NULL,NULL,'n')", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (4014 ,1435255431,2,0 ,'hive',0,4003,'','MANAGED_TABLE',NULL,NULL,'n')", - "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(4001, 1441402388,0, 'd1=1/d2=4001',4001,4011)", - "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(4002, 1441402388,0, 'd1=1/d2=4002',4002,4012)", - "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(4003, 1441402388,0, 'd1=1/d2=4003',4003,4013)", - "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(4004, 1441402388,0, 'd1=1/d2=4004',4004,4014)", - "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(5000, 1441402388,0, 'd1=1/d2=5000',5000,2)", - "insert into SKEWED_STRING_LIST values(1)", - "insert into SKEWED_STRING_LIST values(2)", - "insert into SKEWED_COL_VALUE_LOC_MAP values(1,1,'hdfs://yourhost.com:8020/user/hive/warehouse/mytal/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/')", - "insert into SKEWED_COL_VALUE_LOC_MAP values(2,2,'file:///user/admin/warehouse/mytal/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/')" - }; - scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - isValid = schemaTool.validateLocations(conn, null); - assertFalse(isValid); - isValid = schemaTool.validateLocations(conn, new URI[] {defaultRoot, defaultRoot2}); - assertFalse(isValid); - } - - public void testHiveMetastoreDbPropertiesTable() throws HiveMetaException, IOException { - schemaTool.doInit("3.0.0"); - validateMetastoreDbPropertiesTable(); - } - - public void testMetastoreDbPropertiesAfterUpgrade() throws HiveMetaException, IOException { - schemaTool.doInit("2.0.0"); - schemaTool.doUpgrade(); - validateMetastoreDbPropertiesTable(); - } - - private File generateTestScript(String [] stmts) throws IOException { - File testScriptFile = File.createTempFile("schematest", ".sql"); - testScriptFile.deleteOnExit(); - FileWriter fstream = new FileWriter(testScriptFile.getPath()); - BufferedWriter out = new BufferedWriter(fstream); - for (String line: stmts) { - out.write(line); - out.newLine(); - } - out.close(); - return testScriptFile; - } - - private void validateMetastoreDbPropertiesTable() throws HiveMetaException, IOException { - boolean isValid = (boolean) schemaTool.validateSchemaTables(conn); - assertTrue(isValid); - // adding same property key twice should throw unique key constraint violation exception - String[] scripts = new String[] { - "insert into METASTORE_DB_PROPERTIES values ('guid', 'test-uuid-1', 'dummy uuid 1')", - "insert into METASTORE_DB_PROPERTIES values ('guid', 'test-uuid-2', 'dummy uuid 2')", }; - File scriptFile = generateTestScript(scripts); - Exception ex = null; - try { - schemaTool.runBeeLine(scriptFile.getPath()); - } catch (Exception iox) { - ex = iox; - } - assertTrue(ex != null && ex instanceof IOException); - } - /** - * Write out a dummy pre-upgrade script with given SQL statement. - */ - private String writeDummyPreUpgradeScript(int index, String upgradeScriptName, - String sql) throws Exception { - String preUpgradeScript = "pre-" + index + "-" + upgradeScriptName; - String dummyPreScriptPath = System.getProperty("test.tmp.dir", "target/tmp") + - File.separatorChar + "scripts" + File.separatorChar + "metastore" + - File.separatorChar + "upgrade" + File.separatorChar + "derby" + - File.separatorChar + preUpgradeScript; - FileWriter fstream = new FileWriter(dummyPreScriptPath); - BufferedWriter out = new BufferedWriter(fstream); - out.write(sql + System.getProperty("line.separator") + ";"); - out.close(); - return preUpgradeScript; - } - - /** - * Insert the records in DB to simulate a hive table - * @throws IOException - */ - private void createTestHiveTableSchemas() throws IOException { - String[] scripts = new String[] { - "insert into CTLGS values(2, 'my_catalog', 'description', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb')", - "insert into DBS values(2, 'my db', 'hdfs://myhost.com:8021/user/hive/warehouse/mydb', 'mydb', 'public', 'role', 'my_catalog')", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL,'n')", - "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (3 ,1435255431,2,0 ,'hive',0,2,'aTable','MANAGED_TABLE',NULL,NULL,'n')", - "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)" - }; - File scriptFile = generateTestScript(scripts); - schemaTool.runBeeLine(scriptFile.getPath()); - } - - /** - * A mock Connection class that throws an exception out of getMetaData(). - */ - class BadMetaDataConnection extends DelegatingConnection { - static final String FAILURE_TEXT = "fault injected"; - - BadMetaDataConnection(Connection connection) { - super(connection); - } - - @Override - public DatabaseMetaData getMetaData() throws SQLException { - throw new SQLException(FAILURE_TEXT); - } - } -} diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaToolCatalogOps.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaToolCatalogOps.java deleted file mode 100644 index f5bc570..0000000 --- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaToolCatalogOps.java +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.beeline; - -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaException; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.Catalog; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.Function; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.client.builder.CatalogBuilder; -import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder; -import org.apache.hadoop.hive.metastore.client.builder.FunctionBuilder; -import org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder; -import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.thrift.TException; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.util.Collections; -import java.util.HashSet; -import java.util.Set; - -import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; -import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_NAME; - -public class TestSchemaToolCatalogOps { - private static final Logger LOG = LoggerFactory.getLogger(TestSchemaToolCatalogOps.class); - private static HiveSchemaTool schemaTool; - private static HiveConf conf; - private IMetaStoreClient client; - private static String testMetastoreDB; - - @BeforeClass - public static void initDb() throws HiveMetaException, IOException { - conf = new HiveConf(); - MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.AUTO_CREATE_ALL, false); - MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.HMS_HANDLER_ATTEMPTS, 1); - MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.THRIFT_CONNECTION_RETRIES, 1); - testMetastoreDB = System.getProperty("java.io.tmpdir") + - File.separator + "testschematoolcatopsdb"; - MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CONNECT_URL_KEY, - "jdbc:derby:" + testMetastoreDB + ";create=true"); - schemaTool = new HiveSchemaTool( - System.getProperty("test.tmp.dir", "target/tmp"), conf, "derby", null); - schemaTool.setUserName(MetastoreConf.getVar(conf, MetastoreConf.ConfVars.CONNECTION_USER_NAME)); - schemaTool.setPassWord(MetastoreConf.getPassword(conf, MetastoreConf.ConfVars.PWD)); - schemaTool.doInit(); // Pre-install the database so all the tables are there. - - } - - @AfterClass - public static void removeDb() throws Exception { - File metaStoreDir = new File(testMetastoreDB); - if (metaStoreDir.exists()) { - FileUtils.forceDeleteOnExit(metaStoreDir); - } - } - - @Before - public void createClient() throws MetaException { - client = new HiveMetaStoreClient(conf); - } - - @Test - public void createCatalog() throws HiveMetaException, TException { - String catName = "my_test_catalog"; - String location = "file:///tmp/my_test_catalog"; - String description = "very descriptive"; - schemaTool.createCatalog(catName, location, description, false); - - Catalog cat = client.getCatalog(catName); - Assert.assertEquals(location, cat.getLocationUri()); - Assert.assertEquals(description, cat.getDescription()); - } - - @Test(expected = HiveMetaException.class) - public void createExistingCatalog() throws HiveMetaException { - schemaTool.createCatalog("hive", "somewhere", "", false); - } - - @Test - public void createExistingCatalogWithIfNotExists() throws HiveMetaException { - String catName = "my_existing_test_catalog"; - String location = "file:///tmp/my_test_catalog"; - String description = "very descriptive"; - schemaTool.createCatalog(catName, location, description, false); - - schemaTool.createCatalog(catName, location, description, true); - } - - @Test - public void alterCatalog() throws HiveMetaException, TException { - String catName = "an_alterable_catalog"; - String location = "file:///tmp/an_alterable_catalog"; - String description = "description"; - schemaTool.createCatalog(catName, location, description, false); - - location = "file:///tmp/somewhere_else"; - schemaTool.alterCatalog(catName, location, null); - Catalog cat = client.getCatalog(catName); - Assert.assertEquals(location, cat.getLocationUri()); - Assert.assertEquals(description, cat.getDescription()); - - description = "a better description"; - schemaTool.alterCatalog(catName, null, description); - cat = client.getCatalog(catName); - Assert.assertEquals(location, cat.getLocationUri()); - Assert.assertEquals(description, cat.getDescription()); - - location = "file:///tmp/a_third_location"; - description = "best description yet"; - schemaTool.alterCatalog(catName, location, description); - cat = client.getCatalog(catName); - Assert.assertEquals(location, cat.getLocationUri()); - Assert.assertEquals(description, cat.getDescription()); - } - - @Test(expected = HiveMetaException.class) - public void alterBogusCatalog() throws HiveMetaException { - schemaTool.alterCatalog("nosuch", "file:///tmp/somewhere", "whatever"); - } - - @Test(expected = HiveMetaException.class) - public void alterCatalogNoChange() throws HiveMetaException { - String catName = "alter_cat_no_change"; - String location = "file:///tmp/alter_cat_no_change"; - String description = "description"; - schemaTool.createCatalog(catName, location, description, false); - - schemaTool.alterCatalog(catName, null, null); - } - - @Test - public void moveDatabase() throws HiveMetaException, TException { - String toCatName = "moveDbCat"; - String dbName = "moveDbDb"; - String tableName = "moveDbTable"; - String funcName = "movedbfunc"; - String partVal = "moveDbKey"; - - new CatalogBuilder() - .setName(toCatName) - .setLocation("file:///tmp") - .create(client); - - Database db = new DatabaseBuilder() - .setCatalogName(DEFAULT_CATALOG_NAME) - .setName(dbName) - .create(client, conf); - - new FunctionBuilder() - .inDb(db) - .setName(funcName) - .setClass("org.apache.hive.myudf") - .create(client, conf); - - Table table = new TableBuilder() - .inDb(db) - .setTableName(tableName) - .addCol("a", "int") - .addPartCol("p", "string") - .create(client, conf); - - new PartitionBuilder() - .inTable(table) - .addValue(partVal) - .addToTable(client, conf); - - schemaTool.moveDatabase(DEFAULT_CATALOG_NAME, toCatName, dbName); - - Database fetchedDb = client.getDatabase(toCatName, dbName); - Assert.assertNotNull(fetchedDb); - Assert.assertEquals(toCatName.toLowerCase(), fetchedDb.getCatalogName()); - - Function fetchedFunction = client.getFunction(toCatName, dbName, funcName); - Assert.assertNotNull(fetchedFunction); - Assert.assertEquals(toCatName.toLowerCase(), fetchedFunction.getCatName()); - Assert.assertEquals(dbName.toLowerCase(), fetchedFunction.getDbName()); - - Table fetchedTable = client.getTable(toCatName, dbName, tableName); - Assert.assertNotNull(fetchedTable); - Assert.assertEquals(toCatName.toLowerCase(), fetchedTable.getCatName()); - Assert.assertEquals(dbName.toLowerCase(), fetchedTable.getDbName()); - - Partition fetchedPart = - client.getPartition(toCatName, dbName, tableName, Collections.singletonList(partVal)); - Assert.assertNotNull(fetchedPart); - Assert.assertEquals(toCatName.toLowerCase(), fetchedPart.getCatName()); - Assert.assertEquals(dbName.toLowerCase(), fetchedPart.getDbName()); - Assert.assertEquals(tableName.toLowerCase(), fetchedPart.getTableName()); - } - - @Test - public void moveDatabaseWithExistingDbOfSameNameAlreadyInTargetCatalog() - throws TException, HiveMetaException { - String catName = "clobberCatalog"; - new CatalogBuilder() - .setName(catName) - .setLocation("file:///tmp") - .create(client); - try { - schemaTool.moveDatabase(catName, DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME); - Assert.fail("Attempt to move default database should have failed."); - } catch (HiveMetaException e) { - // good - } - - // Make sure nothing really moved - Set dbNames = new HashSet<>(client.getAllDatabases(DEFAULT_CATALOG_NAME)); - Assert.assertTrue(dbNames.contains(DEFAULT_DATABASE_NAME)); - } - - @Test(expected = HiveMetaException.class) - public void moveNonExistentDatabase() throws TException, HiveMetaException { - String catName = "moveNonExistentDb"; - new CatalogBuilder() - .setName(catName) - .setLocation("file:///tmp") - .create(client); - schemaTool.moveDatabase(catName, DEFAULT_CATALOG_NAME, "nosuch"); - } - - @Test - public void moveDbToNonExistentCatalog() throws TException, HiveMetaException { - String dbName = "doomedToHomelessness"; - new DatabaseBuilder() - .setName(dbName) - .create(client, conf); - try { - schemaTool.moveDatabase(DEFAULT_CATALOG_NAME, "nosuch", dbName); - Assert.fail("Attempt to move database to non-existent catalog should have failed."); - } catch (HiveMetaException e) { - // good - } - - // Make sure nothing really moved - Set dbNames = new HashSet<>(client.getAllDatabases(DEFAULT_CATALOG_NAME)); - Assert.assertTrue(dbNames.contains(dbName.toLowerCase())); - } - - @Test - public void moveTable() throws TException, HiveMetaException { - String toCatName = "moveTableCat"; - String toDbName = "moveTableDb"; - String tableName = "moveTableTable"; - String partVal = "moveTableKey"; - - new CatalogBuilder() - .setName(toCatName) - .setLocation("file:///tmp") - .create(client); - - new DatabaseBuilder() - .setCatalogName(toCatName) - .setName(toDbName) - .create(client, conf); - - Table table = new TableBuilder() - .setTableName(tableName) - .addCol("a", "int") - .addPartCol("p", "string") - .create(client, conf); - - new PartitionBuilder() - .inTable(table) - .addValue(partVal) - .addToTable(client, conf); - - schemaTool.moveTable(DEFAULT_CATALOG_NAME, toCatName, DEFAULT_DATABASE_NAME, toDbName, tableName); - - Table fetchedTable = client.getTable(toCatName, toDbName, tableName); - Assert.assertNotNull(fetchedTable); - Assert.assertEquals(toCatName.toLowerCase(), fetchedTable.getCatName()); - Assert.assertEquals(toDbName.toLowerCase(), fetchedTable.getDbName()); - - Partition fetchedPart = - client.getPartition(toCatName, toDbName, tableName, Collections.singletonList(partVal)); - Assert.assertNotNull(fetchedPart); - Assert.assertEquals(toCatName.toLowerCase(), fetchedPart.getCatName()); - Assert.assertEquals(toDbName.toLowerCase(), fetchedPart.getDbName()); - Assert.assertEquals(tableName.toLowerCase(), fetchedPart.getTableName()); - } - - @Test - public void moveTableWithinCatalog() throws TException, HiveMetaException { - String toDbName = "moveTableWithinCatalogDb"; - String tableName = "moveTableWithinCatalogTable"; - String partVal = "moveTableWithinCatalogKey"; - - new DatabaseBuilder() - .setName(toDbName) - .create(client, conf); - - Table table = new TableBuilder() - .setTableName(tableName) - .addCol("a", "int") - .addPartCol("p", "string") - .create(client, conf); - - new PartitionBuilder() - .inTable(table) - .addValue(partVal) - .addToTable(client, conf); - - schemaTool.moveTable(DEFAULT_CATALOG_NAME, DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME, toDbName, tableName); - - Table fetchedTable = client.getTable(DEFAULT_CATALOG_NAME, toDbName, tableName); - Assert.assertNotNull(fetchedTable); - Assert.assertEquals(DEFAULT_CATALOG_NAME, fetchedTable.getCatName()); - Assert.assertEquals(toDbName.toLowerCase(), fetchedTable.getDbName()); - - Partition fetchedPart = - client.getPartition(DEFAULT_CATALOG_NAME, toDbName, tableName, Collections.singletonList(partVal)); - Assert.assertNotNull(fetchedPart); - Assert.assertEquals(DEFAULT_CATALOG_NAME, fetchedPart.getCatName()); - Assert.assertEquals(toDbName.toLowerCase(), fetchedPart.getDbName()); - Assert.assertEquals(tableName.toLowerCase(), fetchedPart.getTableName()); - } - - @Test - public void moveTableWithExistingTableOfSameNameAlreadyInTargetDatabase() - throws TException, HiveMetaException { - String toDbName = "clobberTableDb"; - String tableName = "clobberTableTable"; - - Database toDb = new DatabaseBuilder() - .setName(toDbName) - .create(client, conf); - - new TableBuilder() - .setTableName(tableName) - .addCol("a", "int") - .create(client, conf); - - new TableBuilder() - .inDb(toDb) - .setTableName(tableName) - .addCol("b", "varchar(32)") - .create(client, conf); - - try { - schemaTool.moveTable(DEFAULT_CATALOG_NAME, DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME, - toDbName, tableName); - Assert.fail("Attempt to move table should have failed."); - } catch (HiveMetaException e) { - // good - } - - // Make sure nothing really moved - Set tableNames = new HashSet<>(client.getAllTables(DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME)); - Assert.assertTrue(tableNames.contains(tableName.toLowerCase())); - - // Make sure the table in the target database didn't get clobbered - Table fetchedTable = client.getTable(DEFAULT_CATALOG_NAME, toDbName, tableName); - Assert.assertEquals("b", fetchedTable.getSd().getCols().get(0).getName()); - } - - @Test(expected = HiveMetaException.class) - public void moveNonExistentTable() throws TException, HiveMetaException { - String toDbName = "moveNonExistentTable"; - new DatabaseBuilder() - .setName(toDbName) - .create(client, conf); - schemaTool.moveTable(DEFAULT_CATALOG_NAME, DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME, toDbName, - "nosuch"); - } - - @Test - public void moveTableToNonExistentDb() throws TException, HiveMetaException { - String tableName = "doomedToWander"; - new TableBuilder() - .setTableName(tableName) - .addCol("a", "int") - .create(client, conf); - - try { - schemaTool.moveTable(DEFAULT_CATALOG_NAME, DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME, - "nosuch", tableName); - Assert.fail("Attempt to move table to non-existent table should have failed."); - } catch (HiveMetaException e) { - // good - } - - // Make sure nothing really moved - Set tableNames = new HashSet<>(client.getAllTables(DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME)); - Assert.assertTrue(tableNames.contains(tableName.toLowerCase())); - } -} diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/schematool/TestSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/schematool/TestSchemaTool.java new file mode 100644 index 0000000..6986689 --- /dev/null +++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/schematool/TestSchemaTool.java @@ -0,0 +1,826 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.beeline.schematool; + +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; +import java.net.URI; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.SQLException; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.commons.dbcp.DelegatingConnection; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.text.StrTokenizer; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.IMetaStoreSchemaInfo; +import org.apache.hadoop.hive.metastore.MetaStoreSchemaInfoFactory; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.NestedScriptParser; +import org.apache.hadoop.hive.metastore.tools.HiveSchemaHelper.PostgresCommandParser; +import org.apache.hadoop.hive.shims.ShimLoader; + +public class TestSchemaTool extends TestCase { + private static HiveSchemaTool schemaTool; + private Connection conn; + private HiveConf hiveConf; + private String testMetastoreDB; + private PrintStream errStream; + private PrintStream outStream; + private String argsBase; + private HiveSchemaToolTaskValidate validator; + + @Override + protected void setUp() throws Exception { + super.setUp(); + testMetastoreDB = System.getProperty("java.io.tmpdir") + + File.separator + "test_metastore-" + new Random().nextInt(); + System.setProperty(MetastoreConf.ConfVars.CONNECT_URL_KEY.getVarname(), + "jdbc:derby:" + testMetastoreDB + ";create=true"); + hiveConf = new HiveConf(this.getClass()); + schemaTool = new HiveSchemaTool( + System.getProperty("test.tmp.dir", "target/tmp"), hiveConf, "derby", null); + + String userName = hiveConf.get(MetastoreConf.ConfVars.CONNECTION_USER_NAME.getVarname()); + String passWord = ShimLoader.getHadoopShims().getPassword(schemaTool.getHiveConf(), + MetastoreConf.ConfVars.PWD.getVarname()); + schemaTool.setUserName(userName); + schemaTool.setPassWord(passWord); + + argsBase = "-dbType derby -userName " + userName + " -passWord " + passWord + " "; + + System.setProperty("beeLine.system.exit", "true"); + errStream = System.err; + outStream = System.out; + conn = schemaTool.getConnectionToMetastore(false); + + validator = new HiveSchemaToolTaskValidate(); + validator.setHiveSchemaTool(schemaTool); + } + + @Override + protected void tearDown() throws Exception { + File metaStoreDir = new File(testMetastoreDB); + if (metaStoreDir.exists()) { + FileUtils.forceDeleteOnExit(metaStoreDir); + } + System.setOut(outStream); + System.setErr(errStream); + if (conn != null) { + conn.close(); + } + } + + /** + * Test the sequence validation functionality + * @throws Exception + */ + public void testValidateSequences() throws Exception { + execute(new HiveSchemaToolTaskInit(), "-initSchema"); + + // Test empty database + boolean isValid = validator.validateSequences(conn); + assertTrue(isValid); + + // Test valid case + String[] scripts = new String[] { + "insert into CTLGS values(99, 'test_cat_1', 'description', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb')", + "insert into SEQUENCE_TABLE values('org.apache.hadoop.hive.metastore.model.MDatabase', 100)", + "insert into DBS values(99, 'test db1', 'hdfs:///tmp', 'db1', 'test', 'test', 'test_cat_1')" + }; + File scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = validator.validateSequences(conn); + assertTrue(isValid); + + // Test invalid case + scripts = new String[] { + "delete from SEQUENCE_TABLE", + "delete from DBS", + "insert into SEQUENCE_TABLE values('org.apache.hadoop.hive.metastore.model.MDatabase', 100)", + "insert into DBS values(102, 'test db1', 'hdfs:///tmp', 'db1', 'test', 'test', 'test_cat_1')" + }; + scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = validator.validateSequences(conn); + assertFalse(isValid); + } + + /** + * Test to validate that all tables exist in the HMS metastore. + * @throws Exception + */ + public void testValidateSchemaTables() throws Exception { + execute(new HiveSchemaToolTaskInit(), "-initSchemaTo 2.0.0"); + + boolean isValid = (boolean)validator.validateSchemaTables(conn); + assertTrue(isValid); + + // upgrade from 2.0.0 schema and re-validate + execute(new HiveSchemaToolTaskUpgrade(), "-upgradeSchemaFrom 2.0.0"); + isValid = (boolean)validator.validateSchemaTables(conn); + assertTrue(isValid); + + // Simulate a missing table scenario by renaming a couple of tables + String[] scripts = new String[] { + "RENAME TABLE SEQUENCE_TABLE to SEQUENCE_TABLE_RENAMED", + "RENAME TABLE NUCLEUS_TABLES to NUCLEUS_TABLES_RENAMED" + }; + + File scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = validator.validateSchemaTables(conn); + assertFalse(isValid); + + // Restored the renamed tables + scripts = new String[] { + "RENAME TABLE SEQUENCE_TABLE_RENAMED to SEQUENCE_TABLE", + "RENAME TABLE NUCLEUS_TABLES_RENAMED to NUCLEUS_TABLES" + }; + + scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = validator.validateSchemaTables(conn); + assertTrue(isValid); + + // Check that an exception from getMetaData() is reported correctly + try { + // Make a Connection object that will throw an exception + BadMetaDataConnection bad = new BadMetaDataConnection(conn); + validator.validateSchemaTables(bad); + fail("did not get expected exception"); + } catch (HiveMetaException hme) { + String message = hme.getMessage(); + assertTrue("Bad HiveMetaException message :" + message, + message.contains("Failed to retrieve schema tables from Hive Metastore DB")); + Throwable cause = hme.getCause(); + assertNotNull("HiveMetaException did not contain a cause", cause); + String causeMessage = cause.getMessage(); + assertTrue("Bad SQLException message: " + causeMessage, causeMessage.contains( + BadMetaDataConnection.FAILURE_TEXT)); + } + } + + /* + * Test the validation of incorrect NULL values in the tables + * @throws Exception + */ + public void testValidateNullValues() throws Exception { + execute(new HiveSchemaToolTaskInit(), "-initSchema"); + + // Test empty database + boolean isValid = validator.validateColumnNullValues(conn); + assertTrue(isValid); + + // Test valid case + createTestHiveTableSchemas(); + isValid = validator.validateColumnNullValues(conn); + + // Test invalid case + String[] scripts = new String[] { + "update TBLS set SD_ID=null" + }; + File scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = validator.validateColumnNullValues(conn); + assertFalse(isValid); + } + + /** + * Test dryrun of schema initialization + * @throws Exception + */ + public void testSchemaInitDryRun() throws Exception { + schemaTool.setDryRun(true); + execute(new HiveSchemaToolTaskInit(), "-initSchemaTo 0.7.0"); + schemaTool.setDryRun(false); + try { + schemaTool.verifySchemaVersion(); + } catch (HiveMetaException e) { + // The connection should fail since it the dry run + return; + } + fail("Dry run shouldn't create actual metastore"); + } + + /** + * Test dryrun of schema upgrade + * @throws Exception + */ + public void testSchemaUpgradeDryRun() throws Exception { + execute(new HiveSchemaToolTaskInit(), "-initSchemaTo 0.7.0"); + + schemaTool.setDryRun(true); + execute(new HiveSchemaToolTaskUpgrade(), "-upgradeSchemaFrom 0.7.0"); + schemaTool.setDryRun(false); + try { + schemaTool.verifySchemaVersion(); + } catch (HiveMetaException e) { + // The connection should fail since it the dry run + return; + } + fail("Dry run shouldn't upgrade metastore schema"); + } + + /** + * Test schema initialization + * @throws Exception + */ + public void testSchemaInit() throws Exception { + IMetaStoreSchemaInfo metastoreSchemaInfo = MetaStoreSchemaInfoFactory.get(hiveConf, + System.getProperty("test.tmp.dir", "target/tmp"), "derby"); + execute(new HiveSchemaToolTaskInit(), "-initSchemaTo " + metastoreSchemaInfo.getHiveSchemaVersion()); + schemaTool.verifySchemaVersion(); + } + + /** + * Test validation for schema versions + * @throws Exception + */ + public void testValidateSchemaVersions() throws Exception { + execute(new HiveSchemaToolTaskInit(), "-initSchema"); + boolean isValid = validator.validateSchemaVersions(); + // Test an invalid case with multiple versions + String[] scripts = new String[] { + "insert into VERSION values(100, '2.2.0', 'Hive release version 2.2.0')" + }; + File scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = validator.validateSchemaVersions(); + assertFalse(isValid); + + scripts = new String[] { + "delete from VERSION where VER_ID = 100" + }; + scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = validator.validateSchemaVersions(); + assertTrue(isValid); + + // Test an invalid case without version + scripts = new String[] { + "delete from VERSION" + }; + scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = validator.validateSchemaVersions(); + assertFalse(isValid); + } + + /** + * Test schema upgrade + * @throws Exception + */ + public void testSchemaUpgrade() throws Exception { + boolean foundException = false; + // Initialize 0.7.0 schema + execute(new HiveSchemaToolTaskInit(), "-initSchemaTo 0.7.0"); + // verify that driver fails due to older version schema + try { + schemaTool.verifySchemaVersion(); + } catch (HiveMetaException e) { + // Expected to fail due to old schema + foundException = true; + } + if (!foundException) { + throw new Exception( + "Hive operations shouldn't pass with older version schema"); + } + + // Generate dummy pre-upgrade script with errors + String invalidPreUpgradeScript = writeDummyPreUpgradeScript( + 0, "upgrade-0.11.0-to-0.12.0.derby.sql", "foo bar;"); + // Generate dummy pre-upgrade scripts with valid SQL + String validPreUpgradeScript0 = writeDummyPreUpgradeScript( + 0, "upgrade-0.12.0-to-0.13.0.derby.sql", + "CREATE TABLE schema_test0 (id integer);"); + String validPreUpgradeScript1 = writeDummyPreUpgradeScript( + 1, "upgrade-0.12.0-to-0.13.0.derby.sql", + "CREATE TABLE schema_test1 (id integer);"); + + // Capture system out and err + schemaTool.setVerbose(true); + OutputStream stderr = new ByteArrayOutputStream(); + PrintStream errPrintStream = new PrintStream(stderr); + System.setErr(errPrintStream); + OutputStream stdout = new ByteArrayOutputStream(); + PrintStream outPrintStream = new PrintStream(stdout); + System.setOut(outPrintStream); + + // Upgrade schema from 0.7.0 to latest + execute(new HiveSchemaToolTaskUpgrade(), "-upgradeSchemaFrom 0.7.0"); + + // Verify that the schemaTool ran pre-upgrade scripts and ignored errors + assertTrue(stderr.toString().contains(invalidPreUpgradeScript)); + assertTrue(stderr.toString().contains("foo")); + assertFalse(stderr.toString().contains(validPreUpgradeScript0)); + assertFalse(stderr.toString().contains(validPreUpgradeScript1)); + assertTrue(stdout.toString().contains(validPreUpgradeScript0)); + assertTrue(stdout.toString().contains(validPreUpgradeScript1)); + + // Verify that driver works fine with latest schema + schemaTool.verifySchemaVersion(); + } + + /** + * Test script formatting + * @throws Exception + */ + public void testScripts() throws Exception { + String testScript[] = { + "-- this is a comment", + "DROP TABLE IF EXISTS fooTab;", + "/*!1234 this is comment code like mysql */;", + "CREATE TABLE fooTab(id INTEGER);", + "DROP TABLE footab;", + "-- ending comment" + }; + String resultScript[] = { + "DROP TABLE IF EXISTS fooTab", + "/*!1234 this is comment code like mysql */", + "CREATE TABLE fooTab(id INTEGER)", + "DROP TABLE footab", + }; + String expectedSQL = StringUtils.join(resultScript, System.getProperty("line.separator")) + + System.getProperty("line.separator"); + File testScriptFile = generateTestScript(testScript); + String flattenedSql = HiveSchemaHelper.getDbCommandParser("derby", false) + .buildCommand(testScriptFile.getParentFile().getPath(), + testScriptFile.getName()); + + assertEquals(expectedSQL, flattenedSql); + } + + /** + * Test nested script formatting + * @throws Exception + */ + public void testNestedScriptsForDerby() throws Exception { + String childTab1 = "childTab1"; + String childTab2 = "childTab2"; + String parentTab = "fooTab"; + + String childTestScript1[] = { + "-- this is a comment ", + "DROP TABLE IF EXISTS " + childTab1 + ";", + "CREATE TABLE " + childTab1 + "(id INTEGER);", + "DROP TABLE " + childTab1 + ";" + }; + String childTestScript2[] = { + "-- this is a comment", + "DROP TABLE IF EXISTS " + childTab2 + ";", + "CREATE TABLE " + childTab2 + "(id INTEGER);", + "-- this is also a comment", + "DROP TABLE " + childTab2 + ";" + }; + + String parentTestScript[] = { + " -- this is a comment", + "DROP TABLE IF EXISTS " + parentTab + ";", + " -- this is another comment ", + "CREATE TABLE " + parentTab + "(id INTEGER);", + "RUN '" + generateTestScript(childTestScript1).getName() + "';", + "DROP TABLE " + parentTab + ";", + "RUN '" + generateTestScript(childTestScript2).getName() + "';", + "--ending comment ", + }; + + File testScriptFile = generateTestScript(parentTestScript); + String flattenedSql = HiveSchemaHelper.getDbCommandParser("derby", false) + .buildCommand(testScriptFile.getParentFile().getPath(), + testScriptFile.getName()); + assertFalse(flattenedSql.contains("RUN")); + assertFalse(flattenedSql.contains("comment")); + assertTrue(flattenedSql.contains(childTab1)); + assertTrue(flattenedSql.contains(childTab2)); + assertTrue(flattenedSql.contains(parentTab)); + } + + /** + * Test nested script formatting + * @throws Exception + */ + public void testNestedScriptsForMySQL() throws Exception { + String childTab1 = "childTab1"; + String childTab2 = "childTab2"; + String parentTab = "fooTab"; + + String childTestScript1[] = { + "/* this is a comment code */", + "DROP TABLE IF EXISTS " + childTab1 + ";", + "CREATE TABLE " + childTab1 + "(id INTEGER);", + "DROP TABLE " + childTab1 + ";" + }; + String childTestScript2[] = { + "/* this is a special exec code */;", + "DROP TABLE IF EXISTS " + childTab2 + ";", + "CREATE TABLE " + childTab2 + "(id INTEGER);", + "-- this is a comment", + "DROP TABLE " + childTab2 + ";" + }; + + String parentTestScript[] = { + " -- this is a comment", + "DROP TABLE IF EXISTS " + parentTab + ";", + " /* this is special exec code */;", + "CREATE TABLE " + parentTab + "(id INTEGER);", + "SOURCE " + generateTestScript(childTestScript1).getName() + ";", + "DROP TABLE " + parentTab + ";", + "SOURCE " + generateTestScript(childTestScript2).getName() + ";", + "--ending comment ", + }; + + File testScriptFile = generateTestScript(parentTestScript); + String flattenedSql = HiveSchemaHelper.getDbCommandParser("mysql", false) + .buildCommand(testScriptFile.getParentFile().getPath(), + testScriptFile.getName()); + assertFalse(flattenedSql.contains("RUN")); + assertFalse(flattenedSql.contains("comment")); + assertTrue(flattenedSql.contains(childTab1)); + assertTrue(flattenedSql.contains(childTab2)); + assertTrue(flattenedSql.contains(parentTab)); + } + + /** + * Test script formatting + * @throws Exception + */ + public void testScriptWithDelimiter() throws Exception { + String testScript[] = { + "-- this is a comment", + "DROP TABLE IF EXISTS fooTab;", + "DELIMITER $$", + "/*!1234 this is comment code like mysql */$$", + "CREATE TABLE fooTab(id INTEGER)$$", + "CREATE PROCEDURE fooProc()", + "SELECT * FROM fooTab;", + "CALL barProc();", + "END PROCEDURE$$", + "DELIMITER ;", + "DROP TABLE footab;", + "-- ending comment" + }; + String resultScript[] = { + "DROP TABLE IF EXISTS fooTab", + "/*!1234 this is comment code like mysql */", + "CREATE TABLE fooTab(id INTEGER)", + "CREATE PROCEDURE fooProc()" + " " + + "SELECT * FROM fooTab;" + " " + + "CALL barProc();" + " " + + "END PROCEDURE", + "DROP TABLE footab", + }; + String expectedSQL = StringUtils.join(resultScript, System.getProperty("line.separator")) + + System.getProperty("line.separator"); + File testScriptFile = generateTestScript(testScript); + NestedScriptParser testDbParser = HiveSchemaHelper.getDbCommandParser("mysql", false); + String flattenedSql = testDbParser.buildCommand(testScriptFile.getParentFile().getPath(), + testScriptFile.getName()); + + assertEquals(expectedSQL, flattenedSql); + } + + /** + * Test script formatting + * @throws Exception + */ + public void testScriptMultiRowComment() throws Exception { + String testScript[] = { + "-- this is a comment", + "DROP TABLE IF EXISTS fooTab;", + "DELIMITER $$", + "/*!1234 this is comment code like mysql */$$", + "CREATE TABLE fooTab(id INTEGER)$$", + "DELIMITER ;", + "/* multiline comment started ", + " * multiline comment continue", + " * multiline comment ended */", + "DROP TABLE footab;", + "-- ending comment" + }; + String parsedScript[] = { + "DROP TABLE IF EXISTS fooTab", + "/*!1234 this is comment code like mysql */", + "CREATE TABLE fooTab(id INTEGER)", + "DROP TABLE footab", + }; + + String expectedSQL = StringUtils.join(parsedScript, System.getProperty("line.separator")) + + System.getProperty("line.separator"); + File testScriptFile = generateTestScript(testScript); + NestedScriptParser testDbParser = HiveSchemaHelper.getDbCommandParser("mysql", false); + String flattenedSql = testDbParser.buildCommand(testScriptFile.getParentFile().getPath(), + testScriptFile.getName()); + + assertEquals(expectedSQL, flattenedSql); + } + + /** + * Test nested script formatting + * @throws Exception + */ + public void testNestedScriptsForOracle() throws Exception { + String childTab1 = "childTab1"; + String childTab2 = "childTab2"; + String parentTab = "fooTab"; + + String childTestScript1[] = { + "-- this is a comment ", + "DROP TABLE IF EXISTS " + childTab1 + ";", + "CREATE TABLE " + childTab1 + "(id INTEGER);", + "DROP TABLE " + childTab1 + ";" + }; + String childTestScript2[] = { + "-- this is a comment", + "DROP TABLE IF EXISTS " + childTab2 + ";", + "CREATE TABLE " + childTab2 + "(id INTEGER);", + "-- this is also a comment", + "DROP TABLE " + childTab2 + ";" + }; + + String parentTestScript[] = { + " -- this is a comment", + "DROP TABLE IF EXISTS " + parentTab + ";", + " -- this is another comment ", + "CREATE TABLE " + parentTab + "(id INTEGER);", + "@" + generateTestScript(childTestScript1).getName() + ";", + "DROP TABLE " + parentTab + ";", + "@" + generateTestScript(childTestScript2).getName() + ";", + "--ending comment ", + }; + + File testScriptFile = generateTestScript(parentTestScript); + String flattenedSql = HiveSchemaHelper.getDbCommandParser("oracle", false) + .buildCommand(testScriptFile.getParentFile().getPath(), + testScriptFile.getName()); + assertFalse(flattenedSql.contains("@")); + assertFalse(flattenedSql.contains("comment")); + assertTrue(flattenedSql.contains(childTab1)); + assertTrue(flattenedSql.contains(childTab2)); + assertTrue(flattenedSql.contains(parentTab)); + } + + /** + * Test script formatting + * @throws Exception + */ + public void testPostgresFilter() throws Exception { + String testScript[] = { + "-- this is a comment", + "DROP TABLE IF EXISTS fooTab;", + HiveSchemaHelper.PostgresCommandParser.POSTGRES_STANDARD_STRINGS_OPT + ";", + "CREATE TABLE fooTab(id INTEGER);", + "DROP TABLE footab;", + "-- ending comment" + }; + + String expectedScriptWithOptionPresent[] = { + "DROP TABLE IF EXISTS fooTab", + HiveSchemaHelper.PostgresCommandParser.POSTGRES_STANDARD_STRINGS_OPT, + "CREATE TABLE fooTab(id INTEGER)", + "DROP TABLE footab", + }; + + NestedScriptParser noDbOptParser = HiveSchemaHelper + .getDbCommandParser("postgres", false); + String expectedSQL = StringUtils.join( + expectedScriptWithOptionPresent, System.getProperty("line.separator")) + + System.getProperty("line.separator"); + File testScriptFile = generateTestScript(testScript); + String flattenedSql = noDbOptParser.buildCommand( + testScriptFile.getParentFile().getPath(), testScriptFile.getName()); + assertEquals(expectedSQL, flattenedSql); + + String expectedScriptWithOptionAbsent[] = { + "DROP TABLE IF EXISTS fooTab", + "CREATE TABLE fooTab(id INTEGER)", + "DROP TABLE footab", + }; + + NestedScriptParser dbOptParser = HiveSchemaHelper.getDbCommandParser( + "postgres", + PostgresCommandParser.POSTGRES_SKIP_STANDARD_STRINGS_DBOPT, + null, null, null, null, false); + expectedSQL = StringUtils.join( + expectedScriptWithOptionAbsent, System.getProperty("line.separator")) + + System.getProperty("line.separator"); + testScriptFile = generateTestScript(testScript); + flattenedSql = dbOptParser.buildCommand( + testScriptFile.getParentFile().getPath(), testScriptFile.getName()); + assertEquals(expectedSQL, flattenedSql); + } + + /** + * Test validate uri of locations + * @throws Exception + */ + public void testValidateLocations() throws Exception { + execute(new HiveSchemaToolTaskInit(), "-initSchema"); + URI defaultRoot = new URI("hdfs://myhost.com:8020"); + URI defaultRoot2 = new URI("s3://myhost2.com:8888"); + //check empty DB + boolean isValid = validator.validateLocations(conn, null); + assertTrue(isValid); + isValid = validator.validateLocations(conn, new URI[] {defaultRoot,defaultRoot2}); + assertTrue(isValid); + + // Test valid case + String[] scripts = new String[] { + "insert into CTLGS values(3, 'test_cat_2', 'description', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb')", + "insert into DBS values(2, 'my db', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb', 'mydb', 'public', 'role', 'test_cat_2')", + "insert into DBS values(7, 'db with bad port', 'hdfs://myhost.com:8020/', 'haDB', 'public', 'role', 'test_cat_2')", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (3,null,'org.apache.hadoop.mapred.TextInputFormat','N','N',null,-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL,'n')", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (3 ,1435255431,2,0 ,'hive',0,3,'myView','VIRTUAL_VIEW','select a.col1,a.col2 from foo','select * from foo','n')", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (4012 ,1435255431,7,0 ,'hive',0,4000,'mytal4012','MANAGED_TABLE',NULL,NULL,'n')", + "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)", + "insert into SKEWED_STRING_LIST values(1)", + "insert into SKEWED_STRING_LIST values(2)", + "insert into SKEWED_COL_VALUE_LOC_MAP values(1,1,'hdfs://myhost.com:8020/user/hive/warehouse/mytal/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/')", + "insert into SKEWED_COL_VALUE_LOC_MAP values(2,2,'s3://myhost.com:8020/user/hive/warehouse/mytal/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/')" + }; + File scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = validator.validateLocations(conn, null); + assertTrue(isValid); + isValid = validator.validateLocations(conn, new URI[] {defaultRoot, defaultRoot2}); + assertTrue(isValid); + scripts = new String[] { + "delete from SKEWED_COL_VALUE_LOC_MAP", + "delete from SKEWED_STRING_LIST", + "delete from PARTITIONS", + "delete from TBLS", + "delete from SDS", + "delete from DBS", + "insert into DBS values(2, 'my db', '/user/hive/warehouse/mydb', 'mydb', 'public', 'role', 'test_cat_2')", + "insert into DBS values(4, 'my db2', 'hdfs://myhost.com:8020', '', 'public', 'role', 'test_cat_2')", + "insert into DBS values(6, 'db with bad port', 'hdfs://myhost.com:8020:', 'zDB', 'public', 'role', 'test_cat_2')", + "insert into DBS values(7, 'db with bad port', 'hdfs://mynameservice.com/', 'haDB', 'public', 'role', 'test_cat_2')", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://yourhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','file:///user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL,'n')", + "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (3000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','yourhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4001,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4003,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4004,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (4002,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (5000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','file:///user/admin/2016_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (3000 ,1435255431,2,0 ,'hive',0,3000,'mytal3000','MANAGED_TABLE',NULL,NULL,'n')", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (4011 ,1435255431,4,0 ,'hive',0,4001,'mytal4011','MANAGED_TABLE',NULL,NULL,'n')", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (4012 ,1435255431,4,0 ,'hive',0,4002,'','MANAGED_TABLE',NULL,NULL,'n')", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (4013 ,1435255431,4,0 ,'hive',0,4003,'mytal4013','MANAGED_TABLE',NULL,NULL,'n')", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (4014 ,1435255431,2,0 ,'hive',0,4003,'','MANAGED_TABLE',NULL,NULL,'n')", + "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(4001, 1441402388,0, 'd1=1/d2=4001',4001,4011)", + "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(4002, 1441402388,0, 'd1=1/d2=4002',4002,4012)", + "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(4003, 1441402388,0, 'd1=1/d2=4003',4003,4013)", + "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(4004, 1441402388,0, 'd1=1/d2=4004',4004,4014)", + "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(5000, 1441402388,0, 'd1=1/d2=5000',5000,2)", + "insert into SKEWED_STRING_LIST values(1)", + "insert into SKEWED_STRING_LIST values(2)", + "insert into SKEWED_COL_VALUE_LOC_MAP values(1,1,'hdfs://yourhost.com:8020/user/hive/warehouse/mytal/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/')", + "insert into SKEWED_COL_VALUE_LOC_MAP values(2,2,'file:///user/admin/warehouse/mytal/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/')" + }; + scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = validator.validateLocations(conn, null); + assertFalse(isValid); + isValid = validator.validateLocations(conn, new URI[] {defaultRoot, defaultRoot2}); + assertFalse(isValid); + } + + public void testHiveMetastoreDbPropertiesTable() throws HiveMetaException, IOException { + execute(new HiveSchemaToolTaskInit(), "-initSchemaTo 3.0.0"); + validateMetastoreDbPropertiesTable(); + } + + public void testMetastoreDbPropertiesAfterUpgrade() throws HiveMetaException, IOException { + execute(new HiveSchemaToolTaskInit(), "-initSchemaTo 2.0.0"); + execute(new HiveSchemaToolTaskUpgrade(), "-upgradeSchema"); + validateMetastoreDbPropertiesTable(); + } + + private File generateTestScript(String [] stmts) throws IOException { + File testScriptFile = File.createTempFile("schematest", ".sql"); + testScriptFile.deleteOnExit(); + FileWriter fstream = new FileWriter(testScriptFile.getPath()); + BufferedWriter out = new BufferedWriter(fstream); + for (String line: stmts) { + out.write(line); + out.newLine(); + } + out.close(); + return testScriptFile; + } + + private void validateMetastoreDbPropertiesTable() throws HiveMetaException, IOException { + boolean isValid = (boolean) validator.validateSchemaTables(conn); + assertTrue(isValid); + // adding same property key twice should throw unique key constraint violation exception + String[] scripts = new String[] { + "insert into METASTORE_DB_PROPERTIES values ('guid', 'test-uuid-1', 'dummy uuid 1')", + "insert into METASTORE_DB_PROPERTIES values ('guid', 'test-uuid-2', 'dummy uuid 2')", }; + File scriptFile = generateTestScript(scripts); + Exception ex = null; + try { + schemaTool.runBeeLine(scriptFile.getPath()); + } catch (Exception iox) { + ex = iox; + } + assertTrue(ex != null && ex instanceof IOException); + } + /** + * Write out a dummy pre-upgrade script with given SQL statement. + */ + private String writeDummyPreUpgradeScript(int index, String upgradeScriptName, + String sql) throws Exception { + String preUpgradeScript = "pre-" + index + "-" + upgradeScriptName; + String dummyPreScriptPath = System.getProperty("test.tmp.dir", "target/tmp") + + File.separatorChar + "scripts" + File.separatorChar + "metastore" + + File.separatorChar + "upgrade" + File.separatorChar + "derby" + + File.separatorChar + preUpgradeScript; + FileWriter fstream = new FileWriter(dummyPreScriptPath); + BufferedWriter out = new BufferedWriter(fstream); + out.write(sql + System.getProperty("line.separator") + ";"); + out.close(); + return preUpgradeScript; + } + + /** + * Insert the records in DB to simulate a hive table + * @throws IOException + */ + private void createTestHiveTableSchemas() throws IOException { + String[] scripts = new String[] { + "insert into CTLGS values(2, 'my_catalog', 'description', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb')", + "insert into DBS values(2, 'my db', 'hdfs://myhost.com:8021/user/hive/warehouse/mydb', 'mydb', 'public', 'role', 'my_catalog')", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL,'n')", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT,IS_REWRITE_ENABLED) values (3 ,1435255431,2,0 ,'hive',0,2,'aTable','MANAGED_TABLE',NULL,NULL,'n')", + "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)" + }; + File scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + } + + /** + * A mock Connection class that throws an exception out of getMetaData(). + */ + class BadMetaDataConnection extends DelegatingConnection { + static final String FAILURE_TEXT = "fault injected"; + + BadMetaDataConnection(Connection connection) { + super(connection); + } + + @Override + public DatabaseMetaData getMetaData() throws SQLException { + throw new SQLException(FAILURE_TEXT); + } + } + + private void execute(HiveSchemaToolTask task, String taskArgs) throws HiveMetaException { + try { + StrTokenizer tokenizer = new StrTokenizer(argsBase + taskArgs, ' ', '\"'); + HiveSchemaToolCommandLine cl = new HiveSchemaToolCommandLine(tokenizer.getTokenArray()); + task.setCommandLineArguments(cl); + } catch (Exception e) { + throw new IllegalStateException("Could not parse comman line \n" + argsBase + taskArgs, e ); + } + + task.setHiveSchemaTool(schemaTool); + task.execute(); + } +} diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/schematool/TestSchemaToolCatalogOps.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/schematool/TestSchemaToolCatalogOps.java new file mode 100644 index 0000000..2337977 --- /dev/null +++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/schematool/TestSchemaToolCatalogOps.java @@ -0,0 +1,474 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline.schematool; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.text.StrTokenizer; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Catalog; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.client.builder.CatalogBuilder; +import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder; +import org.apache.hadoop.hive.metastore.client.builder.FunctionBuilder; +import org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder; +import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.thrift.TException; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; +import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_NAME; + +public class TestSchemaToolCatalogOps { + private static HiveSchemaTool schemaTool; + private static HiveConf conf; + private IMetaStoreClient client; + private static String testMetastoreDB; + private static String argsBase; + + @BeforeClass + public static void initDb() throws HiveMetaException, IOException { + conf = new HiveConf(); + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.AUTO_CREATE_ALL, false); + MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.HMS_HANDLER_ATTEMPTS, 1); + MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.THRIFT_CONNECTION_RETRIES, 1); + testMetastoreDB = System.getProperty("java.io.tmpdir") + + File.separator + "testschematoolcatopsdb"; + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CONNECT_URL_KEY, + "jdbc:derby:" + testMetastoreDB + ";create=true"); + schemaTool = new HiveSchemaTool( + System.getProperty("test.tmp.dir", "target/tmp"), conf, "derby", null); + + String userName = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.CONNECTION_USER_NAME); + String passWord = MetastoreConf.getPassword(conf, MetastoreConf.ConfVars.PWD); + schemaTool.setUserName(userName); + schemaTool.setPassWord(passWord); + + argsBase = "-dbType derby -userName " + userName + " -passWord " + passWord + " "; + execute(new HiveSchemaToolTaskInit(), "-initSchema"); // Pre-install the database so all the tables are there. + } + + @AfterClass + public static void removeDb() throws Exception { + File metaStoreDir = new File(testMetastoreDB); + if (metaStoreDir.exists()) { + FileUtils.forceDeleteOnExit(metaStoreDir); + } + } + + @Before + public void createClient() throws MetaException { + client = new HiveMetaStoreClient(conf); + } + + @Test + public void createCatalog() throws HiveMetaException, TException { + String catName = "my_test_catalog"; + String location = "file:///tmp/my_test_catalog"; + String description = "very descriptive"; + String argsCreate = String.format("-createCatalog %s -catalogLocation \"%s\" -catalogDescription \"%s\"", + catName, location, description); + execute(new HiveSchemaToolTaskCreateCatalog(), argsCreate); + + Catalog cat = client.getCatalog(catName); + Assert.assertEquals(location, cat.getLocationUri()); + Assert.assertEquals(description, cat.getDescription()); + } + + @Test(expected = HiveMetaException.class) + public void createExistingCatalog() throws HiveMetaException { + String catName = "hive"; + String location = "somewhere"; + String argsCreate = String.format("-createCatalog %s -catalogLocation \"%s\"", + catName, location); + execute(new HiveSchemaToolTaskCreateCatalog(), argsCreate); + } + + @Test + public void createExistingCatalogWithIfNotExists() throws HiveMetaException { + String catName = "my_existing_test_catalog"; + String location = "file:///tmp/my_test_catalog"; + String description = "very descriptive"; + String argsCreate1 = String.format("-createCatalog %s -catalogLocation \"%s\" -catalogDescription \"%s\"", + catName, location, description); + execute(new HiveSchemaToolTaskCreateCatalog(), argsCreate1); + + String argsCreate2 = String.format(" -createCatalog %s -catalogLocation \"%s\" -catalogDescription \"%s\" -ifNotExists", + catName, location, description); + execute(new HiveSchemaToolTaskCreateCatalog(), argsCreate2); + } + + @Test + public void alterCatalog() throws HiveMetaException, TException { + String catName = "an_alterable_catalog"; + String location = "file:///tmp/an_alterable_catalog"; + String description = "description"; + String argsCreate = String.format("-createCatalog %s -catalogLocation \"%s\" -catalogDescription \"%s\"", + catName, location, description); + execute(new HiveSchemaToolTaskCreateCatalog(), argsCreate); + + location = "file:///tmp/somewhere_else"; + String argsAlter1 = String.format("-alterCatalog %s -catalogLocation \"%s\"", + catName, location); + execute(new HiveSchemaToolTaskAlterCatalog(), argsAlter1); + Catalog cat = client.getCatalog(catName); + Assert.assertEquals(location, cat.getLocationUri()); + Assert.assertEquals(description, cat.getDescription()); + + description = "a better description"; + String argsAlter2 = String.format("-alterCatalog %s -catalogDescription \"%s\"", + catName, description); + execute(new HiveSchemaToolTaskAlterCatalog(), argsAlter2); + cat = client.getCatalog(catName); + Assert.assertEquals(location, cat.getLocationUri()); + Assert.assertEquals(description, cat.getDescription()); + + location = "file:///tmp/a_third_location"; + description = "best description yet"; + String argsAlter3 = String.format("-alterCatalog %s -catalogLocation \"%s\" -catalogDescription \"%s\"", + catName, location, description); + execute(new HiveSchemaToolTaskAlterCatalog(), argsAlter3); + cat = client.getCatalog(catName); + Assert.assertEquals(location, cat.getLocationUri()); + Assert.assertEquals(description, cat.getDescription()); + } + + @Test(expected = HiveMetaException.class) + public void alterBogusCatalog() throws HiveMetaException { + String catName = "nosuch"; + String location = "file:///tmp/somewhere"; + String description = "whatever"; + String argsAlter = String.format("-alterCatalog %s -catalogLocation \"%s\" -catalogDescription \"%s\"", + catName, location, description); + execute(new HiveSchemaToolTaskAlterCatalog(), argsAlter); + } + + @Test(expected = HiveMetaException.class) + public void alterCatalogNoChange() throws HiveMetaException { + String catName = "alter_cat_no_change"; + String location = "file:///tmp/alter_cat_no_change"; + String description = "description"; + String argsCreate = String.format("-createCatalog %s -catalogLocation \"%s\" -catalogDescription \"%s\"", + catName, location, description); + execute(new HiveSchemaToolTaskCreateCatalog(), argsCreate); + + String argsAlter = String.format("-alterCatalog %s", catName); + execute(new HiveSchemaToolTaskAlterCatalog(), argsAlter); + } + + @Test + public void moveDatabase() throws HiveMetaException, TException { + String toCatName = "moveDbCat"; + String dbName = "moveDbDb"; + String tableName = "moveDbTable"; + String funcName = "movedbfunc"; + String partVal = "moveDbKey"; + + new CatalogBuilder() + .setName(toCatName) + .setLocation("file:///tmp") + .create(client); + + Database db = new DatabaseBuilder() + .setCatalogName(DEFAULT_CATALOG_NAME) + .setName(dbName) + .create(client, conf); + + new FunctionBuilder() + .inDb(db) + .setName(funcName) + .setClass("org.apache.hive.myudf") + .create(client, conf); + + Table table = new TableBuilder() + .inDb(db) + .setTableName(tableName) + .addCol("a", "int") + .addPartCol("p", "string") + .create(client, conf); + + new PartitionBuilder() + .inTable(table) + .addValue(partVal) + .addToTable(client, conf); + + String argsMoveDB = String.format("-moveDatabase %s -fromCatalog %s -toCatalog %s", dbName, + DEFAULT_CATALOG_NAME, toCatName); + execute(new HiveSchemaToolTaskMoveDatabase(), argsMoveDB); + + Database fetchedDb = client.getDatabase(toCatName, dbName); + Assert.assertNotNull(fetchedDb); + Assert.assertEquals(toCatName.toLowerCase(), fetchedDb.getCatalogName()); + + Function fetchedFunction = client.getFunction(toCatName, dbName, funcName); + Assert.assertNotNull(fetchedFunction); + Assert.assertEquals(toCatName.toLowerCase(), fetchedFunction.getCatName()); + Assert.assertEquals(dbName.toLowerCase(), fetchedFunction.getDbName()); + + Table fetchedTable = client.getTable(toCatName, dbName, tableName); + Assert.assertNotNull(fetchedTable); + Assert.assertEquals(toCatName.toLowerCase(), fetchedTable.getCatName()); + Assert.assertEquals(dbName.toLowerCase(), fetchedTable.getDbName()); + + Partition fetchedPart = + client.getPartition(toCatName, dbName, tableName, Collections.singletonList(partVal)); + Assert.assertNotNull(fetchedPart); + Assert.assertEquals(toCatName.toLowerCase(), fetchedPart.getCatName()); + Assert.assertEquals(dbName.toLowerCase(), fetchedPart.getDbName()); + Assert.assertEquals(tableName.toLowerCase(), fetchedPart.getTableName()); + } + + @Test + public void moveDatabaseWithExistingDbOfSameNameAlreadyInTargetCatalog() + throws TException, HiveMetaException { + String catName = "clobberCatalog"; + new CatalogBuilder() + .setName(catName) + .setLocation("file:///tmp") + .create(client); + try { + String argsMoveDB = String.format("-moveDatabase %s -fromCatalog %s -toCatalog %s", + DEFAULT_DATABASE_NAME, catName, DEFAULT_CATALOG_NAME); + execute(new HiveSchemaToolTaskMoveDatabase(), argsMoveDB); + Assert.fail("Attempt to move default database should have failed."); + } catch (HiveMetaException e) { + // good + } + + // Make sure nothing really moved + Set dbNames = new HashSet<>(client.getAllDatabases(DEFAULT_CATALOG_NAME)); + Assert.assertTrue(dbNames.contains(DEFAULT_DATABASE_NAME)); + } + + @Test(expected = HiveMetaException.class) + public void moveNonExistentDatabase() throws TException, HiveMetaException { + String catName = "moveNonExistentDb"; + new CatalogBuilder() + .setName(catName) + .setLocation("file:///tmp") + .create(client); + String argsMoveDB = String.format("-moveDatabase nosuch -fromCatalog %s -toCatalog %s", + catName, DEFAULT_CATALOG_NAME); + execute(new HiveSchemaToolTaskMoveDatabase(), argsMoveDB); + } + + @Test + public void moveDbToNonExistentCatalog() throws TException, HiveMetaException { + String dbName = "doomedToHomelessness"; + new DatabaseBuilder() + .setName(dbName) + .create(client, conf); + try { + String argsMoveDB = String.format("-moveDatabase %s -fromCatalog %s -toCatalog nosuch", + dbName, DEFAULT_CATALOG_NAME); + execute(new HiveSchemaToolTaskMoveDatabase(), argsMoveDB); + Assert.fail("Attempt to move database to non-existent catalog should have failed."); + } catch (HiveMetaException e) { + // good + } + + // Make sure nothing really moved + Set dbNames = new HashSet<>(client.getAllDatabases(DEFAULT_CATALOG_NAME)); + Assert.assertTrue(dbNames.contains(dbName.toLowerCase())); + } + + @Test + public void moveTable() throws TException, HiveMetaException { + String toCatName = "moveTableCat"; + String toDbName = "moveTableDb"; + String tableName = "moveTableTable"; + String partVal = "moveTableKey"; + + new CatalogBuilder() + .setName(toCatName) + .setLocation("file:///tmp") + .create(client); + + new DatabaseBuilder() + .setCatalogName(toCatName) + .setName(toDbName) + .create(client, conf); + + Table table = new TableBuilder() + .setTableName(tableName) + .addCol("a", "int") + .addPartCol("p", "string") + .create(client, conf); + + new PartitionBuilder() + .inTable(table) + .addValue(partVal) + .addToTable(client, conf); + + String argsMoveTable = String.format("-moveTable %s -fromCatalog %s -toCatalog %s -fromDatabase %s -toDatabase %s", + tableName, DEFAULT_CATALOG_NAME, toCatName, DEFAULT_DATABASE_NAME, toDbName); + execute(new HiveSchemaToolTaskMoveTable(), argsMoveTable); + + Table fetchedTable = client.getTable(toCatName, toDbName, tableName); + Assert.assertNotNull(fetchedTable); + Assert.assertEquals(toCatName.toLowerCase(), fetchedTable.getCatName()); + Assert.assertEquals(toDbName.toLowerCase(), fetchedTable.getDbName()); + + Partition fetchedPart = + client.getPartition(toCatName, toDbName, tableName, Collections.singletonList(partVal)); + Assert.assertNotNull(fetchedPart); + Assert.assertEquals(toCatName.toLowerCase(), fetchedPart.getCatName()); + Assert.assertEquals(toDbName.toLowerCase(), fetchedPart.getDbName()); + Assert.assertEquals(tableName.toLowerCase(), fetchedPart.getTableName()); + } + + @Test + public void moveTableWithinCatalog() throws TException, HiveMetaException { + String toDbName = "moveTableWithinCatalogDb"; + String tableName = "moveTableWithinCatalogTable"; + String partVal = "moveTableWithinCatalogKey"; + + new DatabaseBuilder() + .setName(toDbName) + .create(client, conf); + + Table table = new TableBuilder() + .setTableName(tableName) + .addCol("a", "int") + .addPartCol("p", "string") + .create(client, conf); + + new PartitionBuilder() + .inTable(table) + .addValue(partVal) + .addToTable(client, conf); + + String argsMoveTable = String.format("-moveTable %s -fromCatalog %s -toCatalog %s -fromDatabase %s -toDatabase %s", + tableName, DEFAULT_CATALOG_NAME, DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME, toDbName); + execute(new HiveSchemaToolTaskMoveTable(), argsMoveTable); + + Table fetchedTable = client.getTable(DEFAULT_CATALOG_NAME, toDbName, tableName); + Assert.assertNotNull(fetchedTable); + Assert.assertEquals(DEFAULT_CATALOG_NAME, fetchedTable.getCatName()); + Assert.assertEquals(toDbName.toLowerCase(), fetchedTable.getDbName()); + + Partition fetchedPart = + client.getPartition(DEFAULT_CATALOG_NAME, toDbName, tableName, Collections.singletonList(partVal)); + Assert.assertNotNull(fetchedPart); + Assert.assertEquals(DEFAULT_CATALOG_NAME, fetchedPart.getCatName()); + Assert.assertEquals(toDbName.toLowerCase(), fetchedPart.getDbName()); + Assert.assertEquals(tableName.toLowerCase(), fetchedPart.getTableName()); + } + + @Test + public void moveTableWithExistingTableOfSameNameAlreadyInTargetDatabase() + throws TException, HiveMetaException { + String toDbName = "clobberTableDb"; + String tableName = "clobberTableTable"; + + Database toDb = new DatabaseBuilder() + .setName(toDbName) + .create(client, conf); + + new TableBuilder() + .setTableName(tableName) + .addCol("a", "int") + .create(client, conf); + + new TableBuilder() + .inDb(toDb) + .setTableName(tableName) + .addCol("b", "varchar(32)") + .create(client, conf); + + try { + String argsMoveTable = String.format("-moveTable %s -fromCatalog %s -toCatalog %s -fromDatabase %s -toDatabase %s", + tableName, DEFAULT_CATALOG_NAME, DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME, toDbName); + execute(new HiveSchemaToolTaskMoveTable(), argsMoveTable); + Assert.fail("Attempt to move table should have failed."); + } catch (HiveMetaException e) { + // good + } + + // Make sure nothing really moved + Set tableNames = new HashSet<>(client.getAllTables(DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME)); + Assert.assertTrue(tableNames.contains(tableName.toLowerCase())); + + // Make sure the table in the target database didn't get clobbered + Table fetchedTable = client.getTable(DEFAULT_CATALOG_NAME, toDbName, tableName); + Assert.assertEquals("b", fetchedTable.getSd().getCols().get(0).getName()); + } + + @Test(expected = HiveMetaException.class) + public void moveNonExistentTable() throws TException, HiveMetaException { + String toDbName = "moveNonExistentTable"; + new DatabaseBuilder() + .setName(toDbName) + .create(client, conf); + String argsMoveTable = String.format("-moveTable nosuch -fromCatalog %s -toCatalog %s -fromDatabase %s -toDatabase %s", + DEFAULT_CATALOG_NAME, DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME, toDbName); + execute(new HiveSchemaToolTaskMoveTable(), argsMoveTable); + } + + @Test + public void moveTableToNonExistentDb() throws TException, HiveMetaException { + String tableName = "doomedToWander"; + new TableBuilder() + .setTableName(tableName) + .addCol("a", "int") + .create(client, conf); + + try { + String argsMoveTable = String.format("-moveTable %s -fromCatalog %s -toCatalog %s -fromDatabase %s -toDatabase nosuch", + tableName, DEFAULT_CATALOG_NAME, DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME); + execute(new HiveSchemaToolTaskMoveTable(), argsMoveTable); + Assert.fail("Attempt to move table to non-existent table should have failed."); + } catch (HiveMetaException e) { + // good + } + + // Make sure nothing really moved + Set tableNames = new HashSet<>(client.getAllTables(DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME)); + Assert.assertTrue(tableNames.contains(tableName.toLowerCase())); + } + + private static void execute(HiveSchemaToolTask task, String taskArgs) throws HiveMetaException { + try { + StrTokenizer tokenizer = new StrTokenizer(argsBase + taskArgs, ' ', '\"'); + HiveSchemaToolCommandLine cl = new HiveSchemaToolCommandLine(tokenizer.getTokenArray()); + task.setCommandLineArguments(cl); + } catch (Exception e) { + throw new IllegalStateException("Could not parse comman line \n" + argsBase + taskArgs, e ); + } + + task.setHiveSchemaTool(schemaTool); + task.execute(); + } +}