diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestReplicationScenarios.java index ec238d2..ac38340 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestReplicationScenarios.java @@ -281,6 +281,58 @@ public void testBasicWithCM() throws Exception { } @Test + public void testBootstrapLoadOnExistingDb() throws IOException { + String testName = "bootstrapLoadOnExistingDb"; + LOG.info("Testing "+testName); + String dbName = testName + "_" + tid; + + run("CREATE DATABASE " + dbName); + run("CREATE TABLE " + dbName + ".unptned(a string) STORED AS TEXTFILE"); + + String[] unptn_data = new String[]{ "eleven" , "twelve" }; + String unptn_locn = new Path(TEST_PATH , testName + "_unptn").toUri().getPath(); + createTestDataFile(unptn_locn, unptn_data); + + run("LOAD DATA LOCAL INPATH '" + unptn_locn + "' OVERWRITE INTO TABLE " + dbName + ".unptned"); + verifySetup("SELECT * from " + dbName + ".unptned ORDER BY a", unptn_data); + + // Create an empty database to load + run("CREATE DATABASE " + dbName + "_empty"); + + advanceDumpDir(); + run("REPL DUMP " + dbName); + String replDumpLocn = getResult(0,0); + String replDumpId = getResult(0,1,true); + // Load to an empty database + run("REPL LOAD " + dbName + "_empty FROM '" + replDumpLocn + "'"); + + // REPL STATUS should return same repl ID as dump + verifyRun("REPL STATUS " + dbName + "_empty", replDumpId); + verifyRun("SELECT * from " + dbName + "_empty.unptned", unptn_data); + + String[] nullReplId = new String[]{ "NULL" }; + + // Create a database with a table + run("CREATE DATABASE " + dbName + "_withtable"); + run("CREATE TABLE " + dbName + "_withtable.unptned(a string) STORED AS TEXTFILE"); + // Load using same dump to a DB with table + run("REPL LOAD " + dbName + "_withtable FROM '" + replDumpLocn + "'"); + + // REPL STATUS should return NULL + verifyRun("REPL STATUS " + dbName + "_withtable", nullReplId); + + // Create a database with a view + run("CREATE DATABASE " + dbName + "_withview"); + run("CREATE TABLE " + dbName + "_withview.unptned(a string) STORED AS TEXTFILE"); + run("CREATE VIEW " + dbName + "_withview.view AS SELECT * FROM " + dbName + "_withview.unptned"); + // Load using same dump to a DB with view + run("REPL LOAD " + dbName + "_withview FROM '" + replDumpLocn + "'"); + + // REPL STATUS should return NULL + verifyRun("REPL STATUS " + dbName + "_withview", nullReplId); + } + + @Test public void testIncrementalAdds() throws IOException { String testName = "incrementalAdds"; LOG.info("Testing "+testName); @@ -1209,7 +1261,7 @@ public void testExchangePartition() throws IOException { LOG.info("Incremental-Dump: Dumped to {} with id {} from {}", incrementalDumpLocn, incrementalDumpId, replDumpId); replDumpId = incrementalDumpId; run("REPL LOAD " + dbName + "_dupe FROM '" + incrementalDumpLocn + "'"); - verifyRun("SELECT a from " + dbName + "_dupe.ptned_src where (b=1and c=1)", empty); + verifyRun("SELECT a from " + dbName + "_dupe.ptned_src where (b=1 and c=1)", empty); verifyRun("SELECT a from " + dbName + "_dupe.ptned_src where (b=2 and c=2) ORDER BY a", ptn_data_2); verifyRun("SELECT a from " + dbName + "_dupe.ptned_src where (b=2 and c=3) ORDER BY a", ptn_data_2); verifyRun("SELECT a from " + dbName + "_dupe.ptned_dest where (b=1 and c=1) ORDER BY a", ptn_data_1); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java index a85ba42..a7b7fe8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.metastore.ReplChangeManager; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.NotificationEvent; import org.apache.hadoop.hive.metastore.messaging.event.filters.AndFilter; import org.apache.hadoop.hive.metastore.messaging.event.filters.DatabaseAndTableFilter; @@ -53,6 +54,7 @@ import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; @@ -1032,6 +1034,27 @@ private void analyzeReplLoad(ASTNode ast) throws SemanticException { return partSpecs; } + private boolean existEmptyDb(String dbName) throws InvalidOperationException, HiveException { + Hive hiveDb = Hive.get(); + Database db = hiveDb.getDatabase(dbName); + if (null != db) { + List allTables = hiveDb.getAllTables(dbName); + List allFunctions = hiveDb.getFunctions(dbName, "*"); + if (!allTables.isEmpty()) { + throw new InvalidOperationException( + "Database " + db.getName() + " is not empty. One or more tables exist."); + } + if (!allFunctions.isEmpty()) { + throw new InvalidOperationException( + "Database " + db.getName() + " is not empty. One or more functions exist."); + } + + return true; + } + + return false; + } + private void analyzeDatabaseLoad(String dbName, FileSystem fs, FileStatus dir) throws SemanticException { try { @@ -1064,24 +1087,30 @@ private void analyzeDatabaseLoad(String dbName, FileSystem fs, FileStatus dir) dbName = dbObj.getName(); } - CreateDatabaseDesc createDbDesc = new CreateDatabaseDesc(); - createDbDesc.setName(dbName); - createDbDesc.setComment(dbObj.getDescription()); - createDbDesc.setDatabaseProperties(dbObj.getParameters()); - // note that we do not set location - for repl load, we want that auto-created. - - createDbDesc.setIfNotExists(false); - // If it exists, we want this to be an error condition. Repl Load is not intended to replace a - // db. - // TODO: we might revisit this in create-drop-recreate cases, needs some thinking on. - Task createDbTask = TaskFactory.get(new DDLWork(inputs, outputs, createDbDesc), conf); - rootTasks.add(createDbTask); + Task dbRootTask = null; + if (existEmptyDb(dbName)) { + AlterDatabaseDesc alterDbDesc = new AlterDatabaseDesc(dbName, dbObj.getParameters()); + dbRootTask = TaskFactory.get(new DDLWork(inputs, outputs, alterDbDesc), conf); + } else { + CreateDatabaseDesc createDbDesc = new CreateDatabaseDesc(); + createDbDesc.setName(dbName); + createDbDesc.setComment(dbObj.getDescription()); + createDbDesc.setDatabaseProperties(dbObj.getParameters()); + // note that we do not set location - for repl load, we want that auto-created. + + createDbDesc.setIfNotExists(false); + // If it exists, we want this to be an error condition. Repl Load is not intended to replace a + // db. + // TODO: we might revisit this in create-drop-recreate cases, needs some thinking on. + dbRootTask = TaskFactory.get(new DDLWork(inputs, outputs, createDbDesc), conf); + } + rootTasks.add(dbRootTask); FileStatus[] dirsInDbPath = fs.listStatus(dir.getPath(), EximUtil.getDirectoryFilter(fs)); for (FileStatus tableDir : dirsInDbPath) { analyzeTableLoad( - dbName, null, tableDir.getPath().toUri().toString(), createDbTask, null, null); + dbName, null, tableDir.getPath().toUri().toString(), dbRootTask, null, null); } } catch (Exception e) { throw new SemanticException(e);