diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java index a2ab3e0..23f3cf2 100644 --- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.HiveMetaException; import org.apache.hadoop.hive.metastore.MetaStoreSchemaInfo; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hive.beeline.HiveSchemaHelper.NestedScriptParser; @@ -577,6 +578,7 @@ public void doValidate() throws HiveMetaException { validateSequences(); validateSchemaTables(); validateLocations(null); + validateColumnNullValues(); System.out.print("Done with metastore validation"); } @@ -677,6 +679,7 @@ boolean validateSchemaTables() throws HiveMetaException { if (hmsConn != null) { try { hmsConn.close(); + } catch (SQLException e) { throw new HiveMetaException("Failed to close metastore connection", e); } @@ -746,6 +749,37 @@ boolean validateSchemaTables() throws HiveMetaException { return subs; } + boolean validateColumnNullValues() throws HiveMetaException { + System.out.println("Validating columns for incorrect NULL values"); + Connection conn = getConnectionToMetastore(true); + boolean isValid = true; + try { + Statement stmt = conn.createStatement(); + String tblQuery = getDbCommandParser(dbType).needsQuotedIdentifier() ? + ("select t.* from \"TBLS\" t WHERE t.\"SD_ID\" IS NULL and (t.\"TBL_TYPE\"='" + TableType.EXTERNAL_TABLE + "' or t.\"TBL_TYPE\"='" + TableType.MANAGED_TABLE + "')") + : ("select t.* from TBLS t WHERE t.SD_ID IS NULL and (t.TBL_TYPE='" + TableType.EXTERNAL_TABLE + "' or t.TBL_TYPE='" + TableType.MANAGED_TABLE + "')"); + + ResultSet res = stmt.executeQuery(tblQuery); + while (res.next()) { + long tableId = res.getLong("TBL_ID"); + String tableName = res.getString("TBL_NAME"); + String tableType = res.getString("TBL_TYPE"); + isValid = false; + System.err.println("Value of SD_ID in TBLS should not be NULL: hive table - " + tableName + " tableId - " + tableId + " tableType - " + tableType); + } + + return isValid; + } catch(SQLException e) { + throw new HiveMetaException("Failed to validate columns for incorrect NULL values", e); + } finally { + try { + conn.close(); + } catch (SQLException e) { + throw new HiveMetaException("Failed to close metastore connection", e); + } + } + } + /** * Run pre-upgrade scripts corresponding to a given upgrade script, * if any exist. The errors from pre-upgrade are ignored. @@ -818,29 +852,38 @@ public void runBeeLine(String sqlScriptFile) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("Going to invoke file that contains:"); - FileReader fr = new FileReader(sqlScriptFile); - BufferedReader reader = new BufferedReader(fr); - String line; - while ((line = reader.readLine()) != null) { - LOG.debug("script: " + line); + BufferedReader reader = new BufferedReader(new FileReader(sqlScriptFile)); + try { + String line; + while ((line = reader.readLine()) != null) { + LOG.debug("script: " + line); + } + } finally { + if (reader != null) { + reader.close(); + } } } // run the script using Beeline BeeLine beeLine = new BeeLine(); - if (!verbose) { - beeLine.setOutputStream(new PrintStream(new NullOutputStream())); - beeLine.getOpts().setSilent(true); - } - beeLine.getOpts().setAllowMultiLineCommand(false); - beeLine.getOpts().setIsolation("TRANSACTION_READ_COMMITTED"); - // We can be pretty sure that an entire line can be processed as a single command since - // we always add a line separator at the end while calling dbCommandParser.buildCommand. - beeLine.getOpts().setEntireLineAsCommand(true); - LOG.debug("Going to run command <" + StringUtils.join(argList, " ") + ">"); - int status = beeLine.begin(argList.toArray(new String[0]), null); - if (status != 0) { - throw new IOException("Schema script failed, errorcode " + status); + try { + if (!verbose) { + beeLine.setOutputStream(new PrintStream(new NullOutputStream())); + beeLine.getOpts().setSilent(true); + } + beeLine.getOpts().setAllowMultiLineCommand(false); + beeLine.getOpts().setIsolation("TRANSACTION_READ_COMMITTED"); + // We can be pretty sure that an entire line can be processed as a single command since + // we always add a line separator at the end while calling dbCommandParser.buildCommand. + beeLine.getOpts().setEntireLineAsCommand(true); + LOG.debug("Going to run command <" + StringUtils.join(argList, " ") + ">"); + int status = beeLine.begin(argList.toArray(new String[0]), null); + if (status != 0) { + throw new IOException("Schema script failed, errorcode " + status); + } + } finally { + beeLine.close(); } } diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java index 5dc17b9..3b5c6c0 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java @@ -77,7 +77,7 @@ public void testValidateSequences() throws Exception { schemaTool.doInit(); // Test empty database - boolean isValid = (boolean)schemaTool.validateSequences(); + boolean isValid = schemaTool.validateSequences(); assertTrue(isValid); // Test valid case @@ -141,6 +141,31 @@ public void testValidateSchemaTables() throws Exception { assertTrue(isValid); } + /* + * Test the validation of incorrect NULL values in the tables + * @throws Exception + */ + public void testValidateNullValues() throws Exception { + schemaTool.doInit(); + + // Test empty database + boolean isValid = schemaTool.validateColumnNullValues(); + assertTrue(isValid); + + // Test valid case + createTestHiveTableSchemas(); + isValid = schemaTool.validateColumnNullValues(); + + // Test invalid case + String[] scripts = new String[] { + "update TBLS set SD_ID=null" + }; + File scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + isValid = schemaTool.validateColumnNullValues(); + assertFalse(isValid); + } + /** * Test dryrun of schema initialization * @throws Exception @@ -610,4 +635,20 @@ private String writeDummyPreUpgradeScript(int index, String upgradeScriptName, out.close(); return preUpgradeScript; } + + /** + * Insert the records in DB to simulate a hive table + * @throws IOException + */ + private void createTestHiveTableSchemas() throws IOException { + String[] scripts = new String[] { + "insert into DBS values(2, 'my db', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb', 'mydb', 'public', 'role')", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)", + "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL)", + "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)" + }; + File scriptFile = generateTestScript(scripts); + schemaTool.runBeeLine(scriptFile.getPath()); + } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 90ea641..d4024d2 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -3615,7 +3615,7 @@ private void addForeignKeys( } MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD(); - List parentCols = parentCD.getCols(); + List parentCols = parentCD == null ? null : parentCD.getCols(); int parentIntegerIndex = getColumnIndexFromTableColumns(parentCols, fks.get(i).getPkcolumn_name()); if (parentIntegerIndex == -1) { @@ -3690,7 +3690,7 @@ private void addPrimaryKeys(List pks, boolean retrieveCD) throws MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD(); int parentIntegerIndex = - getColumnIndexFromTableColumns(parentCD.getCols(), pks.get(i).getColumn_name()); + getColumnIndexFromTableColumns(parentCD == null ? null : parentCD.getCols(), pks.get(i).getColumn_name()); if (parentIntegerIndex == -1) { throw new InvalidObjectException("Parent column not found: " + pks.get(i).getColumn_name());