diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java index 96b2978..01c360b 100644 --- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java @@ -181,7 +181,7 @@ private String getMetaStoreSchemaVersion(Connection metastoreConn, try(Statement stmt = metastoreConn.createStatement(); ResultSet res = stmt.executeQuery(versionQuery)) { if (!res.next()) { - throw new HiveMetaException("Didn't find version data in metastore"); + throw new HiveMetaException("Could not find version info in metastore VERSION table"); } String currentSchemaVersion = res.getString(1); if (checkDuplicatedVersion && res.next()) { @@ -189,7 +189,7 @@ private String getMetaStoreSchemaVersion(Connection metastoreConn, } return currentSchemaVersion; } catch (SQLException e) { - throw new HiveMetaException("Failed to get schema version.", e); + throw new HiveMetaException("Failed to get schema version, Cause:" + e.getMessage()); } } @@ -199,6 +199,7 @@ boolean validateLocations(Connection conn, URI[] defaultServers) throws HiveMeta rtn = checkMetaStoreDBLocation(conn, defaultServers); rtn = checkMetaStoreTableLocation(conn, defaultServers) && rtn; rtn = checkMetaStorePartitionLocation(conn, defaultServers) && rtn; + rtn = checkMetaStoreSkewedColumnsLocation(conn, defaultServers) && rtn; System.out.println((rtn ? "Succeeded" : "Failed") + " in database/table/partition location validation"); return rtn; } @@ -361,6 +362,64 @@ private boolean checkMetaStorePartitionLocation(Connection conn, URI[] defaultSe return isValid; } + private boolean checkMetaStoreSkewedColumnsLocation(Connection conn, URI[] defaultServers) + throws HiveMetaException { + String skewedColLoc, skewedColIDRange; + boolean isValid = true; + int numOfInvalid = 0; + if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + skewedColIDRange = "select max(\"STRING_LIST_ID_KID\"), min(\"STRING_LIST_ID_KID\") from \"SKEWED_COL_VALUE_LOC_MAP\" "; + } else { + skewedColIDRange = "select max(STRING_LIST_ID_KID), min(STRING_LIST_ID_KID) from SKEWED_COL_VALUE_LOC_MAP"; + } + + if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + skewedColLoc = "select t.\"TBL_NAME\", t.\"TBL_ID\", sk.\"STRING_LIST_ID_KID\", sk.\"LOCATION\" from \"TBLS\" t, \"SDS\" s, \"SKEWED_COL_VALUE_LOC_MAP\" sk " + + "where sk.\"SD_ID\" = s.\"SD_ID\" and s.\"SD_ID\" = t.\"SD_ID\" and sk.\"STRING_LIST_ID_KID\" >= ? and sk.\"STRING_LIST_ID_KID\" <= ? "; + } else { + skewedColLoc = "select t.TBL_NAME, t.TBL_ID, sk.STRING_LIST_ID_KID, sk.LOCATION from TBLS t, SDS s, SKEWED_COL_VALUE_LOC_MAP sk " + + "where sk.SD_ID = s.SD_ID and s.SD_ID = t.SD_ID and sk.STRING_LIST_ID_KID >= ? and sk.STRING_LIST_ID_KID <= ? "; + } + + long maxID = 0, minID = 0; + long rtnSize = 2000; + + try { + Statement stmt = conn.createStatement(); + ResultSet res = stmt.executeQuery(skewedColIDRange); + if (res.next()) { + maxID = res.getLong(1); + minID = res.getLong(2); + } + res.close(); + stmt.close(); + PreparedStatement pStmt = conn.prepareStatement(skewedColLoc); + while (minID <= maxID) { + pStmt.setLong(1, minID); + pStmt.setLong(2, minID + rtnSize); + res = pStmt.executeQuery(); + while (res.next()) { + String locValue = res.getString(4); + String entity = "Table " + getNameOrID(res,1,2) + + ", String list " + res.getString(3); + if (!checkLocation(entity, locValue, defaultServers)) { + numOfInvalid++; + } + } + res.close(); + minID += rtnSize + 1; + } + pStmt.close(); + } catch (SQLException e) { + throw new HiveMetaException("Failed to get skewed columns location info.", e); + } + if (numOfInvalid > 0) { + isValid = false; + System.err.println("Total number of invalid SKEWED_COL_VALUE_LOC_MAP locations is: "+ numOfInvalid); + } + return isValid; + } + /** * Check if the location is valid for the given entity * @param entity the entity to represent a database, partition or table @@ -537,11 +596,26 @@ public void doValidate() throws HiveMetaException { System.out.println("Starting metastore validation"); Connection conn = getConnectionToMetastore(false); try { - validateSchemaVersions(conn); - validateSequences(conn); - validateSchemaTables(conn); - validateLocations(conn, this.validationServers); - validateColumnNullValues(conn); + if (validateSchemaVersions(conn)) + System.out.println("[SUCCESS]\n"); + else + System.out.println("[FAIL]\n"); + if (validateSequences(conn)) + System.out.println("[SUCCESS]\n"); + else + System.out.println("[FAIL]\n"); + if (validateSchemaTables(conn)) + System.out.println("[SUCCESS]\n"); + else + System.out.println("[FAIL]\n"); + if (validateLocations(conn, this.validationServers)) + System.out.println("[SUCCESS]\n"); + else + System.out.println("[FAIL]\n"); + if (validateColumnNullValues(conn)) + System.out.println("[SUCCESS]\n"); + else + System.out.println("[FAIL]\n"); } finally { if (conn != null) { try { @@ -620,7 +694,7 @@ boolean validateSchemaVersions(Connection conn) throws HiveMetaException { } catch (HiveMetaException hme) { if (hme.getMessage().contains("Metastore schema version is not compatible") || hme.getMessage().contains("Multiple versions were found in metastore") - || hme.getMessage().contains("Didn't find version data in metastore")) { + || hme.getMessage().contains("Could not find version info in metastore VERSION table")) { System.out.println("Failed in schema version validation: " + hme.getMessage()); return false; } else { @@ -632,14 +706,27 @@ boolean validateSchemaVersions(Connection conn) throws HiveMetaException { } boolean validateSchemaTables(Connection conn) throws HiveMetaException { + String version = null; ResultSet rs = null; DatabaseMetaData metadata = null; List dbTables = new ArrayList(); List schemaTables = new ArrayList(); List subScripts = new ArrayList(); - String version = getMetaStoreSchemaVersion(conn); + Connection hmsConn = getConnectionToMetastore(false); + + System.out.println("Validating metastore schema tables"); + try { + version = getMetaStoreSchemaVersion(hmsConn); + } catch (HiveMetaException he) { + System.err.println("Failed to determine schema version from Hive Metastore DB," + he.getMessage()); + LOG.error("Failed to determine schema version from Hive Metastore DB," + he.getMessage()); + return false; + } + + // re-open the hms connection + hmsConn = getConnectionToMetastore(false); - System.out.println("Validating tables in the schema for version " + version); + LOG.info("Validating tables in the schema for version " + version); try { metadata = conn.getMetaData(); String[] types = {"TABLE"}; @@ -652,7 +739,7 @@ boolean validateSchemaTables(Connection conn) throws HiveMetaException { LOG.debug("Found table " + table + " in HMS dbstore"); } } catch (SQLException e) { - throw new HiveMetaException(e); + throw new HiveMetaException("Failed to retrieve schema tables from Hive Metastore DB," + e.getMessage()); } finally { if (rs != null) { try { @@ -666,20 +753,24 @@ boolean validateSchemaTables(Connection conn) throws HiveMetaException { // parse the schema file to determine the tables that are expected to exist // we are using oracle schema because it is simpler to parse, no quotes or backticks etc String baseDir = new File(metaStoreSchemaInfo.getMetaStoreScriptDir()).getParent(); - String schemaFile = baseDir + "/oracle/hive-schema-" + version + ".oracle.sql"; + String schemaFile = baseDir + "/" + dbType + "/hive-schema-" + version + "." + dbType + ".sql"; try { LOG.debug("Parsing schema script " + schemaFile); subScripts.addAll(findCreateTable(schemaFile, schemaTables)); while (subScripts.size() > 0) { - schemaFile = baseDir + "/oracle/" + subScripts.remove(0); - LOG.debug("Parsing subscript " + schemaFile); + schemaFile = baseDir + "/" + dbType + "/" + subScripts.remove(0); + LOG.info("Parsing subscript " + schemaFile); subScripts.addAll(findCreateTable(schemaFile, schemaTables)); } } catch (Exception e) { + System.err.println("Exception in parsing schema file, cause " + e.getMessage()); + System.out.println("Schema table validation failed!!!"); return false; } + LOG.debug("Schema tables:[ " + Arrays.toString(schemaTables.toArray()) + " ]"); + LOG.debug("DB tables:[ " + Arrays.toString(dbTables.toArray()) + " ]"); // now diff the lists int schemaSize = schemaTables.size(); schemaTables.removeAll(dbTables); @@ -690,37 +781,91 @@ boolean validateSchemaTables(Connection conn) throws HiveMetaException { System.out.println("Schema table validation failed!!!"); return false; } else { - System.out.println("Succeeded in schema table validation"); + System.out.println("Succeeded in schema table validation," + schemaSize + " tables matched"); return true; } } - private List findCreateTable(String path, List tableList) { - Matcher matcher = null; - String line = null; - List subs = new ArrayList(); - final String NESTED_SCRIPT_IDENTIFIER = "@"; - Pattern regexp = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*"); + private List findCreateTable(String path, List tableList) + throws Exception { + Matcher matcher = null; + Pattern regexp = null; + List subs = new ArrayList(); + String NESTED_SCRIPT_IDENTIFIER = null; + int groupNo = 0; + + switch (dbType) { + case "oracle": + NESTED_SCRIPT_IDENTIFIER = "@"; + regexp = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*"); + groupNo = 3; + break; + + case "mysql": + NESTED_SCRIPT_IDENTIFIER = "SOURCE "; + regexp = Pattern.compile("(CREATE TABLE) (\\S+).*"); + groupNo = 2; + break; + + case "mssql": + NESTED_SCRIPT_IDENTIFIER = ":r "; + regexp = Pattern.compile("(CREATE TABLE) (\\S+).*"); + groupNo = 2; + break; + + case "derby": + NESTED_SCRIPT_IDENTIFIER = "RUN "; + regexp = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*"); + groupNo = 3; + break; + + case "postgres": + NESTED_SCRIPT_IDENTIFIER = "\\i "; + regexp = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*"); + groupNo = 3; + break; + + default: + NESTED_SCRIPT_IDENTIFIER = "@"; + regexp = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*"); + groupNo = 3; + break; + } + + LOG.debug("Script identifier is:" + NESTED_SCRIPT_IDENTIFIER + ",path=" + path); + if (!(new File(path)).exists()) { + throw new Exception(path + " does not exist. Potentially incorrect version in the metastore VERSION table"); + } try ( BufferedReader reader = new BufferedReader(new FileReader(path)); ){ + String line = null; while ((line = reader.readLine()) != null) { if (line.startsWith(NESTED_SCRIPT_IDENTIFIER)) { - int endIndex = (line.indexOf(";") > -1 ) ? line.indexOf(";") : line.length(); - // remove the trailing SEMI-COLON if any - subs.add(line.substring(NESTED_SCRIPT_IDENTIFIER.length(), endIndex)); + String subScript = null; + int endIndex = (line.indexOf(";") > -1) ? line.indexOf(";") : line.length(); + subScript = line.substring(NESTED_SCRIPT_IDENTIFIER.length(), endIndex); + subs.add(subScript.replaceAll("'","")); continue; } + line = line.replaceAll("\\(", " "); + line = line.replaceAll("IF NOT EXISTS ", ""); + line = line.replaceAll("`",""); + line = line.replaceAll("'",""); + line = line.replaceAll("\"",""); matcher = regexp.matcher(line); + if (matcher.find()) { - String table = matcher.group(3); + String table = matcher.group(groupNo); + if (dbType.equals("derby")) + table = table.replaceAll("APP.",""); tableList.add(table.toLowerCase()); LOG.debug("Found table " + table + " in the schema"); } } } catch (IOException ex){ - ex.printStackTrace(); + throw new Exception(ex.getMessage()); } return subs;