diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java index 3402470d749aa8e612ef1913aff6e0ccd3dfc5b1..dd27329a64453a56d2a363d7c080fb2f8145f202 100644 --- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java @@ -29,6 +29,7 @@ import org.apache.commons.io.output.NullOutputStream; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.HiveMetaException; @@ -48,8 +49,10 @@ import java.io.FileWriter; import java.io.IOException; import java.io.PrintStream; +import java.net.URI; import java.sql.Connection; import java.sql.DatabaseMetaData; +import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; @@ -177,6 +180,274 @@ private String getMetaStoreSchemaVersion(Connection metastoreConn) } } + public boolean validateLocations(String defaultLocPrefix) throws HiveMetaException { + boolean rtn; + rtn = checkMetaStoreDBLocation(defaultLocPrefix); + rtn = checkMetaStoreTableLocation(defaultLocPrefix) && rtn; + rtn = checkMetaStorePartitionLocation(defaultLocPrefix) && rtn; + return rtn; + } + + private String getNameOrID(ResultSet res, int nameInx, int idInx) throws SQLException { + String itemName = res.getString(nameInx); + return (itemName == null || itemName.isEmpty()) ? "ID: " + res.getString(idInx) : "Name: " + itemName; + } + + // read schema version from metastore + private boolean checkMetaStoreDBLocation(String locHeader) + throws HiveMetaException { + String defaultPrefix = locHeader; + String dbLoc; + boolean isValid = true; + int numOfInvalid = 0; + Connection metastoreConn = getConnectionToMetastore(true); + if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + dbLoc = "select dbt.\"DB_ID\", dbt.\"NAME\", dbt.\"DB_LOCATION_URI\" from \"DBS\" dbt"; + } else { + dbLoc = "select dbt.DB_ID, dbt.NAME, dbt.DB_LOCATION_URI from DBS dbt"; + } + String locValue; + String dbName; + try(Statement stmt = metastoreConn.createStatement(); + ResultSet res = stmt.executeQuery(dbLoc)) { + while (res.next()) { + locValue = res.getString(3); + if (locValue == null) { + System.err.println("NULL Location for DB with " + getNameOrID(res,2,1)); + numOfInvalid++; + } else { + URI currentUri = null; + try { + currentUri = new Path(locValue).toUri(); + } catch (Exception pe) { + System.err.println("Invalid Location for DB with " + getNameOrID(res,2,1)); + System.err.println(pe.getMessage()); + numOfInvalid++; + continue; + } + + if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) { + System.err.println("Missing Location scheme for DB with " + getNameOrID(res,2,1)); + System.err.println("The Location is: " + locValue); + numOfInvalid++; + } else if (defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length()) + .compareToIgnoreCase(defaultPrefix) != 0) { + System.err.println("Mismatch root Location for DB with " + getNameOrID(res,2,1)); + System.err.println("The Location is: " + locValue); + numOfInvalid++; + } + } + } + + } catch (SQLException e) { + throw new HiveMetaException("Failed to get DB Location Info.", e); + } + finally { + try { + metastoreConn.close(); + } catch (SQLException e) { + System.err.println("Failed to close the metastore connection"); + e.printStackTrace(System.err); + } + } + if (numOfInvalid > 0) { + isValid = false; + System.err.println("Total number of invalid DB locations is: "+ numOfInvalid); + } + return isValid; + } + + private boolean checkMetaStoreTableLocation(String locHeader) + throws HiveMetaException { + String defaultPrefix = locHeader; + String tabLoc, tabIDRange; + boolean isValid = true; + Connection metastoreConn = getConnectionToMetastore(true); + int numOfInvalid = 0; + if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + tabIDRange = "select max(\"TBL_ID\"), min(\"TBL_ID\") from \"TBLS\" "; + } else { + tabIDRange = "select max(TBL_ID), min(TBL_ID) from TBLS"; + } + + if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + tabLoc = "select tbl.\"TBL_ID\", tbl.\"TBL_NAME\", sd.\"LOCATION\", dbt.\"DB_ID\", dbt.\"NAME\" from \"TBLS\" tbl inner join " + + "\"SDS\" sd on tbl.\"SD_ID\" = sd.\"SD_ID\" and tbl.\"TBL_ID\" >= ? and tbl.\"TBL_ID\"<= ? " + + "inner join \"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" "; + } else { + tabLoc = "select tbl.TBL_ID, tbl.TBL_NAME, sd.LOCATION, dbt.DB_ID, dbt.NAME from TBLS tbl join SDS sd on tbl.SD_ID = sd.SD_ID and tbl.TBL_ID >= ? and tbl.TBL_ID <= ? inner join DBS dbt on tbl.DB_ID = dbt.DB_ID"; + } + String locValue; + String tabName; + long maxID = 0, minID = 0, curID; + long rtnSize = 2000; + + try { + Statement stmt = metastoreConn.createStatement(); + ResultSet res = stmt.executeQuery(tabIDRange); + if (res.next()) { + maxID = res.getLong(1); + minID = res.getLong(2); + } + res.close(); + stmt.close(); + curID = minID; + PreparedStatement pStmt = metastoreConn.prepareStatement(tabLoc); + while (minID <= maxID) { + pStmt.setLong(1, minID); + pStmt.setLong(2, minID + rtnSize); + res = pStmt.executeQuery(); + while (res.next()) { + locValue = res.getString(3); + if (locValue == null) { + System.err.println("In DB with " + getNameOrID(res,5,4)); + System.err.println("NULL Location for TABLE with " + getNameOrID(res,2,1)); + numOfInvalid++; + } else { + URI currentUri = null; + try { + currentUri = new Path(locValue).toUri(); + } catch (Exception pe) { + System.err.println("In DB with " + getNameOrID(res,5,4)); + System.err.println("Invalid location for Table with " + getNameOrID(res,2,1)); + System.err.println(pe.getMessage()); + numOfInvalid++; + continue; + } + if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) { + System.err.println("In DB with " + getNameOrID(res,5,4)); + System.err.println("Missing Location scheme for Table with " + getNameOrID(res,2,1)); + System.err.println("The Location is: " + locValue); + numOfInvalid++; + } else if(defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length()) + .compareToIgnoreCase(defaultPrefix) != 0) { + System.err.println("In DB with " + getNameOrID(res,5,4)); + System.err.println("Mismatch root Location for Table with " + getNameOrID(res,2,1)); + System.err.println("The Location is: " + locValue); + numOfInvalid++; + } + } + } + res.close(); + minID += rtnSize + 1; + + } + pStmt.close(); + + } catch (SQLException e) { + throw new HiveMetaException("Failed to get Table Location Info.", e); + } + finally { + try { + metastoreConn.close(); + } catch (SQLException e) { + System.err.println("Failed to close the metastore connection"); + e.printStackTrace(System.err); + } + } + if (numOfInvalid > 0) { + isValid = false; + System.err.println("Total number of invalid TABLE locations is: "+ numOfInvalid); + } + return isValid; + } + + private boolean checkMetaStorePartitionLocation(String locHeader) + throws HiveMetaException { + String defaultPrefix = locHeader; + String partLoc, partIDRange; + boolean isValid = true; + int numOfInvalid = 0; + Connection metastoreConn = getConnectionToMetastore(true); + if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + partIDRange = "select max(\"PART_ID\"), min(\"PART_ID\") from \"PARTITIONS\" "; + } else { + partIDRange = "select max(PART_ID), min(PART_ID) from PARTITIONS"; + } + + if (getDbCommandParser(dbType).needsQuotedIdentifier()) { + partLoc = "select pt.\"PART_ID\", pt.\"PART_NAME\", sd.\"LOCATION\", tbl.\"TBL_ID\", tbl.\"TBL_NAME\",dbt.\"DB_ID\", dbt.\"NAME\" from \"PARTITIONS\" pt " + + "inner join \"SDS\" sd on pt.\"SD_ID\" = sd.\"SD_ID\" and pt.\"PART_ID\" >= ? and pt.\"PART_ID\"<= ? " + + " inner join \"TBLS\" tbl on pt.\"TBL_ID\" = tbl.\"TBL_ID\" inner join " + + "\"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" "; + } else { + partLoc = "select pt.PART_ID, pt.PART_NAME, sd.LOCATION, tbl.TBL_ID, tbl.TBL_NAME, dbt.DB_ID, dbt.NAME from PARTITIONS pt " + + "inner join SDS sd on pt.SD_ID = sd.SD_ID and pt.PART_ID >= ? and pt.PART_ID <= ? " + + "inner join TBLS tbl on tbl.TBL_ID = pt.TBL_ID inner join DBS dbt on tbl.DB_ID = dbt.DB_ID "; + } + String locValue; + String tabName; + long maxID = 0, minID = 0, curID; + long rtnSize = 2000; + + try { + Statement stmt = metastoreConn.createStatement(); + ResultSet res = stmt.executeQuery(partIDRange); + if (res.next()) { + maxID = res.getLong(1); + minID = res.getLong(2); + } + res.close(); + stmt.close(); + curID = minID; + PreparedStatement pStmt = metastoreConn.prepareStatement(partLoc); + while (minID <= maxID) { + pStmt.setLong(1, minID); + pStmt.setLong(2, minID + rtnSize); + res = pStmt.executeQuery(); + while (res.next()) { + locValue = res.getString(3); + if (locValue == null) { + System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4)); + System.err.println("NULL Location for PARTITION with " + getNameOrID(res,2,1)); + numOfInvalid++; + } else { + URI currentUri = null; + try { + currentUri = new Path(locValue).toUri(); + } catch (Exception pe) { + System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4)); + System.err.println("Invalid location for PARTITON with " + getNameOrID(res,2,1)); + System.err.println(pe.getMessage()); + numOfInvalid++; + continue; + } + if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) { + System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4)); + System.err.println("Missing Location scheme for PARTITON with " + getNameOrID(res,2,1)); + System.err.println("The Location is: " + locValue); + numOfInvalid++; + } else if (defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length()) + .compareToIgnoreCase(defaultPrefix) != 0) { + System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4)); + System.err.println("Mismatch root Location for PARTITON with " + getNameOrID(res,2,1)); + System.err.println("The Location is: " + locValue); + numOfInvalid++; + } + } + } + res.close(); + minID += rtnSize + 1; + } + pStmt.close(); + } catch (SQLException e) { + throw new HiveMetaException("Failed to get Partiton Location Info.", e); + } + finally { + try { + metastoreConn.close(); + } catch (SQLException e) { + System.err.println("Failed to close the metastore connection"); + e.printStackTrace(System.err); + } + } + if (numOfInvalid > 0) { + isValid = false; + System.err.println("Total number of invalid PARTITION locations is: "+ numOfInvalid); + } + return isValid; + } + // test the connection metastore using the config property private void testConnectionToMetastore() throws HiveMetaException { Connection conn = getConnectionToMetastore(true); diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java index 2209c838f6d33e9c71c3160d9e6ef0e2aa95452d..5dc17b9c72398a641d3ea382c15a0ed2e833fa7d 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java @@ -529,6 +529,58 @@ public void testPostgresFilter() throws Exception { assertEquals(expectedSQL, flattenedSql); } + /** + * Test validate uri of locations + * @throws Exception + */ + public void testValidateLocations() throws Exception { + schemaTool.doInit(); + String defaultRoot = "hdfs://myhost.com:8020"; + //check empty DB + boolean isValid = schemaTool.validateLocations(null); + assertTrue(isValid); + isValid = schemaTool.validateLocations(defaultRoot); + assertTrue(isValid); + + String dbmydbLocation = defaultRoot + "/user/hive/warehouse/mydb"; + // Test valid case + String[] scripts = new String[] { + "insert into DBS values(2, 'my db', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb', 'mydb', 'public', 'role')", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL)", + "insert into PARTITiONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)" + + }; + File scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = schemaTool.validateLocations(null); + assertTrue(isValid); + isValid = schemaTool.validateLocations(defaultRoot); + assertTrue(isValid); + scripts = new String[] { + "delete from PARTITIONS", + "delete from TBLS", + "delete from SDS", + "delete from DBS", + "insert into DBS values(2, 'my db', '/user/hive/warehouse/mydb', 'mydb', 'public', 'role')", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://yourhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','file:///user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL)", + "insert into PARTITiONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (3000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','yourhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (5000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','file:///user/admin/2016_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT) values (3000 ,1435255431,2,0 ,'hive',0,3000,'mytal3000','MANAGED_TABLE',NULL,NULL)", + "insert into PARTITiONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(5000, 1441402388,0, 'd1=1/d2=5000',5000,2)" + }; + scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = schemaTool.validateLocations(null); + assertFalse(isValid); + isValid = schemaTool.validateLocations(defaultRoot); + assertFalse(isValid); + } + private File generateTestScript(String [] stmts) throws IOException { File testScriptFile = File.createTempFile("schematest", ".sql"); testScriptFile.deleteOnExit();