diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationAcrossInstances.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationAcrossInstances.java index b805b1915c..d321ccaec2 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationAcrossInstances.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationAcrossInstances.java @@ -19,7 +19,6 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.shims.Utils; import org.junit.After; import org.junit.AfterClass; @@ -58,7 +57,6 @@ static void internalBeforeClassSetup(Map overrides, Class clazz) }}; localOverrides.putAll(overrides); primary = new WarehouseInstance(LOG, miniDFSCluster, localOverrides); - localOverrides.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replica = new WarehouseInstance(LOG, miniDFSCluster, localOverrides); } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java index 38580c1c67..cb6bd2d70a 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java @@ -23,7 +23,6 @@ import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsRequest; import org.apache.hadoop.hive.metastore.api.OpenTxnRequest; import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.ql.DriverFactory; @@ -88,7 +87,6 @@ static void internalBeforeClassSetup(Map overrides, Class clazz) acidEnableConf.putAll(overrides); primary = new WarehouseInstance(LOG, miniDFSCluster, acidEnableConf); - acidEnableConf.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replica = new WarehouseInstance(LOG, miniDFSCluster, acidEnableConf); HashMap overridesForHiveConf1 = new HashMap() {{ put("fs.defaultFS", miniDFSCluster.getFileSystem().getUri().toString()); @@ -96,7 +94,6 @@ static void internalBeforeClassSetup(Map overrides, Class clazz) put("hive.txn.manager", "org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager"); put("hive.metastore.client.capability.check", "false"); }}; - overridesForHiveConf1.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replicaNonAcid = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConf1); } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/ReplicationTestUtils.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/ReplicationTestUtils.java index a82bbad929..936acc45fe 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/ReplicationTestUtils.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/ReplicationTestUtils.java @@ -265,11 +265,11 @@ public static void verifyResultsInReplica(WarehouseInstance replica ,String repl List selectStmtList, List expectedValues, String lastReplId) throws Throwable { WarehouseInstance.Tuple incrementalDump = primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + replica.loadWithoutExplain(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName).verifyResult(incrementalDump.lastReplicationId); verifyResultsInReplica(replica, replicatedDbName, selectStmtList, expectedValues); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + replica.loadWithoutExplain(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName).verifyResult(incrementalDump.lastReplicationId); verifyResultsInReplica(replica, replicatedDbName, selectStmtList, expectedValues); return incrementalDump; diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestCopyUtils.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestCopyUtils.java index 9648c72dda..77a0d33b52 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestCopyUtils.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestCopyUtils.java @@ -20,7 +20,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.shims.Utils; @@ -96,7 +95,6 @@ public static void classLevelSetup() throws Exception { put(ConfVars.HIVE_DISTCP_DOAS_USER.varname, currentUser); }}; primary = new WarehouseInstanceWithMR(LOG, miniDFSCluster, overridesForHiveConf); - overridesForHiveConf.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replica = new WarehouseInstanceWithMR(LOG, miniDFSCluster, overridesForHiveConf); } @@ -123,7 +121,7 @@ public void setup() throws Throwable { */ @Test public void testPrivilegedDistCpWithSameUserAsCurrentDoesNotTryToImpersonate() throws Throwable { - primary + WarehouseInstance.Tuple tuple = primary .run("use " + primaryDbName) .run("create table t1 (id int)") .run("insert into t1 values (1),(2),(3)") @@ -134,7 +132,7 @@ public void testPrivilegedDistCpWithSameUserAsCurrentDoesNotTryToImpersonate() t We have to do a comparision on the data of table t1 in replicated database because even though the file copy will fail due to impersonation failure the driver will return a success code 0. May be something to look at later */ - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("select * from " + replicatedDbName + ".t1") .verifyResults(Arrays.asList("1", "2", "3", "12", "11", "13")); } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestMetaStoreEventListenerInRepl.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestMetaStoreEventListenerInRepl.java index 703d16fcc5..7b0f634a0b 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestMetaStoreEventListenerInRepl.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestMetaStoreEventListenerInRepl.java @@ -84,7 +84,6 @@ public static void internalBeforeClassSetup() throws Exception { }}; primary = new WarehouseInstance(LOG, miniDFSCluster, conf); - conf.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replica = new WarehouseInstance(LOG, miniDFSCluster, conf); } @@ -173,26 +172,26 @@ public void tearDown() throws Throwable { @Test public void testReplEvents() throws Throwable { Map> eventsMap = prepareBootstrapData(primaryDbName); - primary.run("use " + primaryDbName) + WarehouseInstance.Tuple bootstrapDump = primary.run("use " + primaryDbName) .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, bootstrapDump.dumpLocation); ReplMetaStoreEventListenerTestImpl.checkEventSanity(eventsMap, replicatedDbName); ReplMetaStoreEventListenerTestImpl.clearSanityData(); eventsMap = prepareIncData(primaryDbName); LOG.info(testName.getMethodName() + ": first incremental dump and load."); - primary.run("use " + primaryDbName) + WarehouseInstance.Tuple incDump = primary.run("use " + primaryDbName) .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, incDump.dumpLocation); ReplMetaStoreEventListenerTestImpl.checkEventSanity(eventsMap, replicatedDbName); ReplMetaStoreEventListenerTestImpl.clearSanityData(); // Second incremental, after bootstrap eventsMap = prepareInc2Data(primaryDbName); LOG.info(testName.getMethodName() + ": second incremental dump and load."); - primary.run("use " + primaryDbName) + WarehouseInstance.Tuple inc2Dump = primary.run("use " + primaryDbName) .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, inc2Dump.dumpLocation); ReplMetaStoreEventListenerTestImpl.checkEventSanity(eventsMap, replicatedDbName); ReplMetaStoreEventListenerTestImpl.clearSanityData(); } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOfHiveStreaming.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOfHiveStreaming.java index 62457b36b6..5c8f902b26 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOfHiveStreaming.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOfHiveStreaming.java @@ -91,7 +91,6 @@ static void internalBeforeClassSetup(Map overrides, acidEnableConf.putAll(overrides); primary = new WarehouseInstance(LOG, miniDFSCluster, acidEnableConf); - acidEnableConf.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replica = new WarehouseInstance(LOG, miniDFSCluster, acidEnableConf); } @@ -117,8 +116,8 @@ public void tearDown() throws Throwable { @Test public void testHiveStreamingUnpartitionedWithTxnBatchSizeAsOne() throws Throwable { - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + WarehouseInstance.Tuple bootstrapDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, bootstrapDump.dumpLocation); // Create an ACID table. String tblName = "alerts"; @@ -149,8 +148,8 @@ public void testHiveStreamingUnpartitionedWithTxnBatchSizeAsOne() throws Throwab connection.commitTransaction(); // Replicate the committed data which should be visible. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + WarehouseInstance.Tuple incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " order by msg") .verifyResults((new String[] {"val1", "val2"})); @@ -161,8 +160,8 @@ public void testHiveStreamingUnpartitionedWithTxnBatchSizeAsOne() throws Throwab connection.write("4,val4".getBytes()); // Replicate events before committing txn. The uncommitted data shouldn't be seen. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " order by msg") .verifyResults((new String[] {"val1", "val2"})); @@ -170,8 +169,8 @@ public void testHiveStreamingUnpartitionedWithTxnBatchSizeAsOne() throws Throwab connection.commitTransaction(); // After commit, the data should be replicated and visible. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " order by msg") .verifyResults((new String[] {"val1", "val2", "val3", "val4"})); @@ -183,8 +182,8 @@ public void testHiveStreamingUnpartitionedWithTxnBatchSizeAsOne() throws Throwab connection.abortTransaction(); // Aborted data shouldn't be visible. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " order by msg") .verifyResults((new String[] {"val1", "val2", "val3", "val4"})); @@ -195,8 +194,8 @@ public void testHiveStreamingUnpartitionedWithTxnBatchSizeAsOne() throws Throwab @Test public void testHiveStreamingStaticPartitionWithTxnBatchSizeAsOne() throws Throwable { - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + WarehouseInstance.Tuple bootstrapDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, bootstrapDump.dumpLocation); // Create an ACID table. String tblName = "alerts"; @@ -234,8 +233,8 @@ public void testHiveStreamingStaticPartitionWithTxnBatchSizeAsOne() throws Throw connection.commitTransaction(); // Replicate the committed data which should be visible. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + WarehouseInstance.Tuple incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg") .verifyResults((new String[] {"val1", "val2"})); @@ -246,8 +245,8 @@ public void testHiveStreamingStaticPartitionWithTxnBatchSizeAsOne() throws Throw connection.write("4,val4".getBytes()); // Replicate events before committing txn. The uncommitted data shouldn't be seen. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg") .verifyResults((new String[] {"val1", "val2"})); @@ -255,8 +254,8 @@ public void testHiveStreamingStaticPartitionWithTxnBatchSizeAsOne() throws Throw connection.commitTransaction(); // After commit, the data should be replicated and visible. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg") .verifyResults((new String[] {"val1", "val2", "val3", "val4"})); @@ -268,8 +267,8 @@ public void testHiveStreamingStaticPartitionWithTxnBatchSizeAsOne() throws Throw connection.abortTransaction(); // Aborted data shouldn't be visible. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg") .verifyResults((new String[] {"val1", "val2", "val3", "val4"})); @@ -280,8 +279,8 @@ public void testHiveStreamingStaticPartitionWithTxnBatchSizeAsOne() throws Throw @Test public void testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne() throws Throwable { - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + WarehouseInstance.Tuple bootstrapDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, bootstrapDump.dumpLocation); // Create an ACID table. String tblName = "alerts"; @@ -316,8 +315,8 @@ public void testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne() throws Thro connection.commitTransaction(); // Replicate the committed data which should be visible. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + WarehouseInstance.Tuple incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " where continent='Asia' and country='China' order by msg") .verifyResults((new String[] {"val11"})) @@ -330,8 +329,8 @@ public void testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne() throws Thro connection.write("14,val14,Asia,India".getBytes()); // Replicate events before committing txn. The uncommitted data shouldn't be seen. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg") .verifyResults((new String[] {"val12"})); @@ -339,8 +338,8 @@ public void testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne() throws Thro connection.commitTransaction(); // After committing the txn, the data should be visible. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg") .verifyResults((new String[] {"val12", "val14"})) @@ -354,8 +353,8 @@ public void testHiveStreamingDynamicPartitionWithTxnBatchSizeAsOne() throws Thro connection.abortTransaction(); // Aborted data should not be visible. - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + incrDump = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, incrDump.dumpLocation) .run("use " + replicatedDbName) .run("select msg from " + tblName + " where continent='Asia' and country='India' order by msg") .verifyResults((new String[] {"val12", "val14"})) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java index f6a33bc26a..0a69d63563 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java @@ -99,7 +99,6 @@ public void targetAndSourceHaveDifferentEncryptionZoneKeys() throws Throwable { put(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS.varname, "false"); put(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER.varname, UserGroupInformation.getCurrentUser().getUserName()); - put(HiveConf.ConfVars.REPLDIR.varname, primary.repldDir); }}, "test_key123"); WarehouseInstance.Tuple tuple = @@ -110,8 +109,8 @@ public void targetAndSourceHaveDifferentEncryptionZoneKeys() throws Throwable { .dump(primaryDbName); replica - .run("repl load " + primaryDbName + " into " + replicatedDbName - + " with('hive.repl.add.raw.reserved.namespace'='true', " + .run("repl load " + replicatedDbName + " from '" + tuple.dumpLocation + + "' with('hive.repl.add.raw.reserved.namespace'='true', " + "'hive.repl.replica.external.table.base.dir'='" + replica.externalTableWarehouseRoot + "', " + "'distcp.options.pugpbx'='', 'distcp.options.skipcrccheck'='')") .run("use " + replicatedDbName) @@ -141,8 +140,8 @@ public void targetAndSourceHaveSameEncryptionZoneKeys() throws Throwable { .dump(primaryDbName); replica - .run("repl load " + primaryDbName + " into " + replicatedDbName - + " with('hive.repl.add.raw.reserved.namespace'='true')") + .run("repl load " + replicatedDbName + " from '" + tuple.dumpLocation + + "' with('hive.repl.add.raw.reserved.namespace'='true')") .run("use " + replicatedDbName) .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index b709ce7281..003533a3e3 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -253,7 +253,7 @@ private Tuple bootstrapLoadAndVerify(String dbName, String replDbName) throws IO private Tuple incrementalLoadAndVerify(String dbName, String replDbName) throws IOException { Tuple dump = replDumpDb(dbName); - loadAndVerify(replDbName, dbName, dump.lastReplId); + loadAndVerify(replDbName, dump.dumpLocation, dump.lastReplId); return dump; } @@ -267,8 +267,8 @@ private Tuple replDumpDb(String dbName) throws IOException { return new Tuple(dumpLocation, lastReplId); } - private void loadAndVerify(String replDbName, String sourceDbNameOrPattern, String lastReplId) throws IOException { - run("REPL LOAD " + sourceDbNameOrPattern + " INTO " + replDbName, driverMirror); + private void loadAndVerify(String replDbName, String dumpLocation, String lastReplId) throws IOException { + run("REPL LOAD " + replDbName + " FROM '" + dumpLocation + "'", driverMirror); verifyRun("REPL STATUS " + replDbName, lastReplId, driverMirror); return; } @@ -393,7 +393,7 @@ public void testTaskCreationOptimization() throws Throwable { assertEquals(false, hasMoveTask(task)); assertEquals(true, hasPartitionTask(task)); - loadAndVerify(dbNameReplica, dbName, dump.lastReplId); + loadAndVerify(dbNameReplica, dump.dumpLocation, dump.lastReplId); run("insert into table " + dbName + ".t2 partition(country='india') values ('delhi')", driver); dump = replDumpDb(dbName); @@ -404,7 +404,7 @@ public void testTaskCreationOptimization() throws Throwable { assertEquals(true, hasMoveTask(task)); assertEquals(true, hasPartitionTask(task)); - loadAndVerify(dbNameReplica, dbName, dump.lastReplId); + loadAndVerify(dbNameReplica, dump.dumpLocation, dump.lastReplId); run("insert into table " + dbName + ".t2 partition(country='us') values ('sf')", driver); dump = replDumpDb(dbName); @@ -458,7 +458,7 @@ public void testBasicWithCM() throws Exception { // Partition droppped after "repl dump" run("ALTER TABLE " + dbName + ".ptned " + "DROP PARTITION(b=1)", driver); - run("REPL LOAD " + dbName + " INTO " + replDbName, driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + replDumpLocn + "'", driverMirror); verifyRun("REPL STATUS " + replDbName, new String[] {replDumpId}, driverMirror); verifyRun("SELECT * from " + replDbName + ".unptned", unptn_data, driverMirror); @@ -495,7 +495,7 @@ public void testBootstrapLoadOnExistingDb() throws IOException { createDB(dbName + "_withtable", driverMirror); run("CREATE TABLE " + dbName + "_withtable.unptned(a string) STORED AS TEXTFILE", driverMirror); // Load using same dump to a DB with table. It should fail as DB is not empty. - verifyFail("REPL LOAD " + dbName + " INTO " + dbName + "_withtable ", driverMirror); + verifyFail("REPL LOAD " + dbName + "_withtable FROM '" + replDumpLocn + "'", driverMirror); // REPL STATUS should return NULL verifyRun("REPL STATUS " + dbName + "_withtable", nullReplId, driverMirror); @@ -505,7 +505,7 @@ public void testBootstrapLoadOnExistingDb() throws IOException { run("CREATE TABLE " + dbName + "_withview.unptned(a string) STORED AS TEXTFILE", driverMirror); run("CREATE VIEW " + dbName + "_withview.view AS SELECT * FROM " + dbName + "_withview.unptned", driverMirror); // Load using same dump to a DB with view. It should fail as DB is not empty. - verifyFail("REPL LOAD " + dbName + " INTO " + dbName + "_withview", driverMirror); + verifyFail("REPL LOAD " + dbName + "_withview FROM '" + replDumpLocn + "'", driverMirror); // REPL STATUS should return NULL verifyRun("REPL STATUS " + dbName + "_withview", nullReplId, driverMirror); @@ -566,7 +566,7 @@ public Table apply(@Nullable Table table) { String replDumpLocn = getResult(0, 0, driver); String replDumpId = getResult(0, 1, true, driver); LOG.info("Bootstrap-Dump: Dumped to {} with id {}", replDumpLocn, replDumpId); - run("REPL LOAD " + dbName + " INTO " + replDbName, driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + replDumpLocn + "'", driverMirror); // The ptned table should miss in target as the table was marked virtually as dropped verifyRun("SELECT * from " + replDbName + ".unptned", unptn_data, driverMirror); @@ -581,7 +581,7 @@ public Table apply(@Nullable Table table) { String postDropReplDumpLocn = getResult(0,0, driver); String postDropReplDumpId = getResult(0,1,true,driver); LOG.info("Dumped to {} with id {}->{}", postDropReplDumpLocn, replDumpId, postDropReplDumpId); - assert(run("REPL LOAD " + dbName + " INTO " + replDbName, true, driverMirror)); + assert(run("REPL LOAD " + replDbName + " FROM '" + postDropReplDumpLocn + "'", true, driverMirror)); verifyRun("SELECT * from " + replDbName + ".unptned", unptn_data, driverMirror); verifyIfTableNotExist(replDbName, "ptned", metaStoreClientMirror); @@ -633,7 +633,7 @@ public void testBootstrapWithConcurrentDropPartition() throws IOException { String replDumpLocn = getResult(0, 0, driver); String replDumpId = getResult(0, 1, true, driver); LOG.info("Bootstrap-Dump: Dumped to {} with id {}", replDumpLocn, replDumpId); - run("REPL LOAD " + dbName + " INTO " + replDbName, driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + replDumpLocn + "'", driverMirror); // All partitions should miss in target as it was marked virtually as dropped verifyRun("SELECT a from " + replDbName + ".ptned WHERE b=1", empty, driverMirror); @@ -650,7 +650,7 @@ public void testBootstrapWithConcurrentDropPartition() throws IOException { String postDropReplDumpLocn = getResult(0,0,driver); String postDropReplDumpId = getResult(0,1,true,driver); LOG.info("Dumped to {} with id {}->{}", postDropReplDumpLocn, replDumpId, postDropReplDumpId); - assert(run("REPL LOAD " + dbName + " INTO " + replDbName, true, driverMirror)); + assert(run("REPL LOAD " + replDbName + " FROM '" + postDropReplDumpLocn + "'", true, driverMirror)); verifyIfPartitionNotExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("1")), metaStoreClientMirror); verifyIfPartitionNotExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("2")), metaStoreClientMirror); @@ -872,6 +872,7 @@ public void testIncrementalLoadWithVariableLengthEventId() throws IOException, T Tuple incrementalDump = replDumpDb(dbName); String incrementalDumpLocn = incrementalDump.dumpLocation; + replDumpId = incrementalDump.lastReplId; // Rename the event directories such a way that the length varies. // We will encounter create_table, truncate followed by insert. @@ -905,7 +906,7 @@ public void testIncrementalLoadWithVariableLengthEventId() throws IOException, T } // Load from modified dump event directories. - run("REPL LOAD " + dbName + " INTO " + replDbName, driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + incrementalDumpLocn + "'", driverMirror); verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", unptn_data, driverMirror); } @@ -1109,7 +1110,7 @@ public void testDropsWithCM() throws IOException { // Drop partition after dump run("ALTER TABLE " + dbName + ".ptned_copy DROP PARTITION(b='1')", driver); - run("REPL LOAD " + dbName + " INTO " + replDbName, driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + postDropReplDumpLocn + "'", driverMirror); Exception e = null; try { @@ -1709,7 +1710,7 @@ public void testIncrementalInsertDropUnpartitionedTable() throws IOException { verifyFail("SELECT * FROM " + dbName + ".unptned_tmp", driver); // Dump all the events except DROP - loadAndVerify(replDbName, dbName, incrementalDump.lastReplId); + loadAndVerify(replDbName, incrementalDump.dumpLocation, incrementalDump.lastReplId); // Need to find the tables and data as drop is not part of this dump verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", unptn_data, driverMirror); @@ -1759,7 +1760,7 @@ public void testIncrementalInsertDropPartitionedTable() throws IOException { verifyFail("SELECT * FROM " + dbName + ".ptned", driver); // Replicate all the events except DROP - loadAndVerify(replDbName, dbName, incrementalDump.lastReplId); + loadAndVerify(replDbName, incrementalDump.dumpLocation, incrementalDump.lastReplId); // Need to find the tables and data as drop is not part of this dump verifyRun("SELECT a from " + replDbName + ".ptned where (b=1) ORDER BY a", ptn_data_1, driverMirror); @@ -1792,7 +1793,7 @@ public void testInsertOverwriteOnUnpartitionedTableWithCM() throws IOException { run("INSERT OVERWRITE TABLE " + dbName + ".unptned values('" + data_after_ovwrite[0] + "')", driver); // Replicate only one INSERT INTO operation on the table. - loadAndVerify(replDbName, dbName, incrementalDump.lastReplId); + loadAndVerify(replDbName, incrementalDump.dumpLocation, incrementalDump.lastReplId); // After Load from this dump, all target tables/partitions will have initial set of data but source will have latest data. verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", unptn_data, driverMirror); @@ -1828,7 +1829,7 @@ public void testInsertOverwriteOnPartitionedTableWithCM() throws IOException { verifySetup("SELECT a from " + dbName + ".ptned where (b=2)", data_after_ovwrite, driver); // Replicate only 2 INSERT INTO operations. - loadAndVerify(replDbName, dbName, incrementalDump.lastReplId); + loadAndVerify(replDbName, incrementalDump.dumpLocation, incrementalDump.lastReplId); incrementalDump = replDumpDb(dbName); // After Load from this dump, all target tables/partitions will have initial set of data. @@ -1836,7 +1837,7 @@ public void testInsertOverwriteOnPartitionedTableWithCM() throws IOException { verifyRun("SELECT a from " + replDbName + ".ptned where (b=2) ORDER BY a", ptn_data_2, driverMirror); // Replicate the remaining INSERT OVERWRITE operation on the table. - loadAndVerify(replDbName, dbName, incrementalDump.lastReplId); + loadAndVerify(replDbName, incrementalDump.dumpLocation, incrementalDump.lastReplId); // After load, shall see the overwritten data. verifyRun("SELECT a from " + replDbName + ".ptned where (b=1) ORDER BY a", ptn_data_1, driverMirror); @@ -1931,7 +1932,7 @@ public void testRenameTableWithCM() throws IOException { run("ALTER TABLE " + dbName + ".unptned RENAME TO " + dbName + ".unptned_renamed", driver); run("ALTER TABLE " + dbName + ".ptned RENAME TO " + dbName + ".ptned_renamed", driver); - loadAndVerify(replDbName, dbName, incrementalDump.lastReplId); + loadAndVerify(replDbName, incrementalDump.dumpLocation, incrementalDump.lastReplId); verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", unptn_data, driverMirror); verifyRun("SELECT a from " + replDbName + ".ptned where (b=1) ORDER BY a", ptn_data_1, driverMirror); @@ -1970,7 +1971,7 @@ public void testRenamePartitionWithCM() throws IOException { run("ALTER TABLE " + dbName + ".ptned PARTITION (b=2) RENAME TO PARTITION (b=10)", driver); - loadAndVerify(replDbName, dbName, incrementalDump.lastReplId); + loadAndVerify(replDbName, incrementalDump.dumpLocation, incrementalDump.lastReplId); verifyRun("SELECT a from " + replDbName + ".ptned where (b=1) ORDER BY a", ptn_data_1, driverMirror); verifyRun("SELECT a from " + replDbName + ".ptned where (b=2) ORDER BY a", ptn_data_2, driverMirror); verifyRun("SELECT a from " + replDbName + ".ptned where (b=10) ORDER BY a", empty, driverMirror); @@ -2338,20 +2339,20 @@ public void testTruncateWithCM() throws IOException { run("INSERT INTO TABLE " + dbName + ".unptned values('" + unptn_data_load1[0] + "')", driver); verifyRun("SELECT a from " + dbName + ".unptned ORDER BY a", unptn_data_load1, driver); - run("REPL LOAD " + dbName + " INTO " + replDbName, driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + replDumpLocn + "'", driverMirror); // Dump and load only first insert (1 record) - loadAndVerify(replDbName, dbName, firstInsert.lastReplId); + loadAndVerify(replDbName, firstInsert.dumpLocation, firstInsert.lastReplId); verifyRun("SELECT a from " + dbName + "_dupe.unptned ORDER BY a", unptn_data_load1, driverMirror); // Dump and load only second insert (2 records) - loadAndVerify(replDbName, dbName, secondInsert.lastReplId); + loadAndVerify(replDbName, secondInsert.dumpLocation, secondInsert.lastReplId); verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", unptn_data_load2, driverMirror); // Dump and load only truncate (0 records) - loadAndVerify(replDbName, dbName, thirdTrunc.lastReplId); + loadAndVerify(replDbName, thirdTrunc.dumpLocation, thirdTrunc.lastReplId); verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", empty, driverMirror); // Dump and load insert after truncate (1 record) @@ -2360,7 +2361,7 @@ public void testTruncateWithCM() throws IOException { } @Test - public void testIncrementalRepeatEventOnExistingObject() throws IOException, InterruptedException { + public void testIncrementalRepeatEventOnExistingObject() throws IOException { String testName = "incrementalRepeatEventOnExistingObject"; String dbName = createDB(testName, driver); run("CREATE TABLE " + dbName + ".unptned(a string) STORED AS TEXTFILE", driver); @@ -2382,46 +2383,46 @@ public void testIncrementalRepeatEventOnExistingObject() throws IOException, Int run("INSERT INTO TABLE " + dbName + ".unptned values('" + unptn_data[0] + "')", driver); Tuple replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // INSERT EVENT to partitioned table with dynamic ADD_PARTITION run("INSERT INTO TABLE " + dbName + ".ptned PARTITION(b=1) values('" + ptn_data_1[0] + "')", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // ADD_PARTITION EVENT to partitioned table run("ALTER TABLE " + dbName + ".ptned ADD PARTITION (b=2)", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // INSERT EVENT to partitioned table on existing partition run("INSERT INTO TABLE " + dbName + ".ptned PARTITION(b=2) values('" + ptn_data_2[0] + "')", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // TRUNCATE_PARTITION EVENT on partitioned table run("TRUNCATE TABLE " + dbName + ".ptned PARTITION (b=1)", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // TRUNCATE_TABLE EVENT on unpartitioned table run("TRUNCATE TABLE " + dbName + ".unptned", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // CREATE_TABLE EVENT with multiple partitions run("CREATE TABLE " + dbName + ".unptned_tmp AS SELECT * FROM " + dbName + ".ptned", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // ADD_CONSTRAINT EVENT run("ALTER TABLE " + dbName + ".unptned_tmp ADD CONSTRAINT uk_unptned UNIQUE(a) disable", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // Replicate all the events happened so far for (Tuple currDump : incrementalDumpList) { // Load the incremental dump and ensure it does nothing and lastReplID remains same - loadAndVerify(replDbName, dbName, currDump.lastReplId); + loadAndVerify(replDbName, currDump.dumpLocation, currDump.lastReplId); } Tuple incrDump = incrementalLoadAndVerify(dbName, replDbName); @@ -2431,15 +2432,18 @@ public void testIncrementalRepeatEventOnExistingObject() throws IOException, Int verifyRun("SELECT a from " + replDbName + ".unptned_tmp where (b=1) ORDER BY a", empty, driverMirror); verifyRun("SELECT a from " + replDbName + ".unptned_tmp where (b=2) ORDER BY a", ptn_data_2, driverMirror); - // Load the incremental dump and ensure it does nothing and lastReplID remains same - loadAndVerify(replDbName, dbName, incrDump.lastReplId); + // Load each incremental dump from the list. Each dump have only one operation. + for (Tuple currDump : incrementalDumpList) { + // Load the incremental dump and ensure it does nothing and lastReplID remains same + loadAndVerify(replDbName, currDump.dumpLocation, incrDump.lastReplId); - // Verify if the data are intact even after applying an applied event once again on existing objects - verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", empty, driverMirror); - verifyRun("SELECT a from " + replDbName + ".ptned where (b=1) ORDER BY a", empty, driverMirror); - verifyRun("SELECT a from " + replDbName + ".ptned where (b=2) ORDER BY a", ptn_data_2, driverMirror); - verifyRun("SELECT a from " + replDbName + ".unptned_tmp where (b=1) ORDER BY a", empty, driverMirror); - verifyRun("SELECT a from " + replDbName + ".unptned_tmp where (b=2) ORDER BY a", ptn_data_2, driverMirror); + // Verify if the data are intact even after applying an applied event once again on existing objects + verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", empty, driverMirror); + verifyRun("SELECT a from " + replDbName + ".ptned where (b=1) ORDER BY a", empty, driverMirror); + verifyRun("SELECT a from " + replDbName + ".ptned where (b=2) ORDER BY a", ptn_data_2, driverMirror); + verifyRun("SELECT a from " + replDbName + ".unptned_tmp where (b=1) ORDER BY a", empty, driverMirror); + verifyRun("SELECT a from " + replDbName + ".unptned_tmp where (b=2) ORDER BY a", ptn_data_2, driverMirror); + } } @Test @@ -2464,76 +2468,76 @@ public void testIncrementalRepeatEventOnMissingObject() throws Exception { run("INSERT INTO TABLE " + dbName + ".unptned values('" + unptn_data[0] + "')", driver); Tuple replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // INSERT EVENT to partitioned table with dynamic ADD_PARTITION run("INSERT INTO TABLE " + dbName + ".ptned partition(b=1) values('" + ptn_data_1[0] + "')", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // ADD_PARTITION EVENT to partitioned table run("ALTER TABLE " + dbName + ".ptned ADD PARTITION (b=2)", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // INSERT EVENT to partitioned table on existing partition run("INSERT INTO TABLE " + dbName + ".ptned partition(b=2) values('" + ptn_data_2[0] + "')", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // TRUNCATE_PARTITION EVENT on partitioned table run("TRUNCATE TABLE " + dbName + ".ptned PARTITION(b=1)", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // TRUNCATE_TABLE EVENT on unpartitioned table run("TRUNCATE TABLE " + dbName + ".unptned", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // CREATE_TABLE EVENT on partitioned table run("CREATE TABLE " + dbName + ".ptned_tmp (a string) PARTITIONED BY (b int) STORED AS TEXTFILE", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // INSERT EVENT to partitioned table with dynamic ADD_PARTITION run("INSERT INTO TABLE " + dbName + ".ptned_tmp partition(b=10) values('" + ptn_data_1[0] + "')", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // INSERT EVENT to partitioned table with dynamic ADD_PARTITION run("INSERT INTO TABLE " + dbName + ".ptned_tmp partition(b=20) values('" + ptn_data_2[0] + "')", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // DROP_PARTITION EVENT to partitioned table run("ALTER TABLE " + dbName + ".ptned DROP PARTITION (b=1)", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // RENAME_PARTITION EVENT to partitioned table run("ALTER TABLE " + dbName + ".ptned PARTITION (b=2) RENAME TO PARTITION (b=20)", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // RENAME_TABLE EVENT to unpartitioned table run("ALTER TABLE " + dbName + ".unptned RENAME TO " + dbName + ".unptned_new", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // ADD_CONSTRAINT EVENT run("ALTER TABLE " + dbName + ".ptned_tmp ADD CONSTRAINT uk_unptned UNIQUE(a) disable", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // DROP_TABLE EVENT to partitioned table run("DROP TABLE " + dbName + ".ptned_tmp", driver); replDump = replDumpDb(dbName); incrementalDumpList.add(replDump); - Thread.sleep(1000); + // Load each incremental dump from the list. Each dump have only one operation. for (Tuple currDump : incrementalDumpList) { // Load the current incremental dump and ensure it does nothing and lastReplID remains same - loadAndVerify(replDbName, dbName, currDump.lastReplId); + loadAndVerify(replDbName, currDump.dumpLocation, currDump.lastReplId); } // Replicate all the events happened so far Tuple incrDump = incrementalLoadAndVerify(dbName, replDbName); @@ -2547,17 +2551,19 @@ public void testIncrementalRepeatEventOnMissingObject() throws Exception { verifyIfPartitionExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("20")), metaStoreClientMirror); // Load each incremental dump from the list. Each dump have only one operation. - // Load the current incremental dump and ensure it does nothing and lastReplID remains same - loadAndVerify(replDbName, dbName, incrDump.lastReplId); - - // Verify if the data are intact even after applying an applied event once again on missing objects - verifyIfTableNotExist(replDbName, "unptned", metaStoreClientMirror); - verifyIfTableNotExist(replDbName, "ptned_tmp", metaStoreClientMirror); - verifyIfTableExist(replDbName, "unptned_new", metaStoreClientMirror); - verifyIfTableExist(replDbName, "ptned", metaStoreClientMirror); - verifyIfPartitionNotExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("1")), metaStoreClientMirror); - verifyIfPartitionNotExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("2")), metaStoreClientMirror); - verifyIfPartitionExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("20")), metaStoreClientMirror); + for (Tuple currDump : incrementalDumpList) { + // Load the current incremental dump and ensure it does nothing and lastReplID remains same + loadAndVerify(replDbName, currDump.dumpLocation, incrDump.lastReplId); + + // Verify if the data are intact even after applying an applied event once again on missing objects + verifyIfTableNotExist(replDbName, "unptned", metaStoreClientMirror); + verifyIfTableNotExist(replDbName, "ptned_tmp", metaStoreClientMirror); + verifyIfTableExist(replDbName, "unptned_new", metaStoreClientMirror); + verifyIfTableExist(replDbName, "ptned", metaStoreClientMirror); + verifyIfPartitionNotExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("1")), metaStoreClientMirror); + verifyIfPartitionNotExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("2")), metaStoreClientMirror); + verifyIfPartitionExist(replDbName, "ptned", new ArrayList<>(Arrays.asList("20")), metaStoreClientMirror); + } } @Test @@ -2655,14 +2661,14 @@ public void testIncrementalLoadFailAndRetry() throws IOException { // Replicate all the events happened so far. It should fail as the data files missing in // original path and not available in CM as well. Tuple incrDump = replDumpDb(dbName); - verifyFail("REPL LOAD " + dbName + " INTO " + replDbName, driverMirror); + verifyFail("REPL LOAD " + replDbName + " FROM '" + incrDump.dumpLocation + "'", driverMirror); verifyRun("SELECT a from " + replDbName + ".ptned where (b=1) ORDER BY a", empty, driverMirror); verifyFail("SELECT a from " + replDbName + ".ptned_tmp where (b=1) ORDER BY a", driverMirror); // Move the files back to original data location assert(dataFs.rename(tmpLoc, ptnLoc)); - loadAndVerify(replDbName, dbName, incrDump.lastReplId); + loadAndVerify(replDbName, incrDump.dumpLocation, incrDump.lastReplId); verifyRun("SELECT a from " + replDbName + ".ptned where (b=1) ORDER BY a", ptn_data_1, driverMirror); verifyRun("SELECT a from " + replDbName + ".ptned_tmp where (b=1) ORDER BY a", ptn_data_1, driverMirror); @@ -2884,7 +2890,7 @@ public void testDeleteStagingDir() throws IOException { String replDumpLocn = replDumpDb(dbName).dumpLocation; // Reset the driver driverMirror.close(); - run("REPL LOAD " + dbName + " INTO " + replDbName, driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + replDumpLocn + "'", driverMirror); // Calling close() explicitly to clean up the staging dirs driverMirror.close(); // Check result @@ -2935,7 +2941,7 @@ public void testCMConflict() throws IOException { run("TRUNCATE TABLE " + dbName + ".unptned", driver); LOG.info("Bootstrap-Dump: Dumped to {} with id {}", replDumpLocn, replDumpId); - run("REPL LOAD " + dbName + " INTO " + replDbName, driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + replDumpLocn + "'", driverMirror); verifyRun("SELECT count(*) from " + replDbName + ".unptned", new String[]{"2"}, driverMirror); } @@ -3141,7 +3147,7 @@ public void testLoadCmPathMissing() throws IOException { fs.delete(path); try { - driverMirror.run("REPL LOAD " + dbName + " INTO " + dbName); + driverMirror.run("REPL LOAD " + dbName + " FROM '" + dumpLocation + "'"); assert false; } catch (CommandProcessorException e) { assertTrue(e.getResponseCode() == ErrorMsg.REPL_FILE_MISSING_FROM_SRC_AND_CM_PATH.getErrorCode()); @@ -3281,8 +3287,8 @@ public void testMoveOptimizationBootstrap() throws IOException { String replDbName = dbName + "_replica"; Tuple dump = replDumpDb(dbName); - run("REPL LOAD " + dbName + " INTO " + replDbName + - " with ('hive.repl.enable.move.optimization'='true')", driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + dump.dumpLocation + + "' with ('hive.repl.enable.move.optimization'='true')", driverMirror); verifyRun("REPL STATUS " + replDbName, dump.lastReplId, driverMirror); run(" use " + replDbName, driverMirror); @@ -3302,7 +3308,8 @@ public void testMoveOptimizationIncremental() throws IOException { String dbName = createDB(testName, driver); String replDbName = dbName + "_replica"; - bootstrapLoadAndVerify(dbName, replDbName); + Tuple bootstrapDump = bootstrapLoadAndVerify(dbName, replDbName); + String replDumpId = bootstrapDump.lastReplId; String[] unptn_data = new String[] { "eleven", "twelve" }; @@ -3315,9 +3322,10 @@ public void testMoveOptimizationIncremental() throws IOException { verifySetup("SELECT * from " + dbName + ".unptned_late ORDER BY a", unptn_data, driver); Tuple incrementalDump = replDumpDb(dbName); - run("REPL LOAD " + dbName + " INTO " + replDbName + - " with ('hive.repl.enable.move.optimization'='true')", driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + incrementalDump.dumpLocation + + "' with ('hive.repl.enable.move.optimization'='true')", driverMirror); verifyRun("REPL STATUS " + replDbName, incrementalDump.lastReplId, driverMirror); + replDumpId = incrementalDump.lastReplId; verifyRun("SELECT a from " + replDbName + ".unptned ORDER BY a", unptn_data, driverMirror); verifyRun("SELECT a from " + replDbName + ".unptned_late ORDER BY a", unptn_data, driverMirror); @@ -3332,8 +3340,8 @@ public void testMoveOptimizationIncremental() throws IOException { verifySetup("SELECT a from " + dbName + ".unptned", data_after_ovwrite, driver); incrementalDump = replDumpDb(dbName); - run("REPL LOAD " + dbName + " INTO " + replDbName + - " with ('hive.repl.enable.move.optimization'='true')", driverMirror); + run("REPL LOAD " + replDbName + " FROM '" + incrementalDump.dumpLocation + + "' with ('hive.repl.enable.move.optimization'='true')", driverMirror); verifyRun("REPL STATUS " + replDbName, incrementalDump.lastReplId, driverMirror); verifyRun("SELECT a from " + replDbName + ".unptned_late ORDER BY a", unptn_data_after_ins, driverMirror); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java index 2854045350..f3a1abb7a1 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java @@ -51,7 +51,6 @@ import java.util.Map; import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION; -import static org.junit.Assert.assertEquals; /** * TestReplicationScenariosAcidTables - test replication for ACID tables. @@ -91,7 +90,6 @@ static void internalBeforeClassSetup(Map overrides, acidEnableConf.putAll(overrides); primary = new WarehouseInstance(LOG, miniDFSCluster, acidEnableConf); - acidEnableConf.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replica = new WarehouseInstance(LOG, miniDFSCluster, acidEnableConf); Map overridesForHiveConf1 = new HashMap() {{ put("fs.defaultFS", miniDFSCluster.getFileSystem().getUri().toString()); @@ -99,7 +97,6 @@ static void internalBeforeClassSetup(Map overrides, put("hive.txn.manager", "org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager"); put("hive.metastore.client.capability.check", "false"); }}; - overridesForHiveConf1.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replicaNonAcid = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConf1); } @@ -120,7 +117,7 @@ public void tearDown() throws Throwable { public void testAcidTablesBootstrap() throws Throwable { // Bootstrap WarehouseInstance.Tuple bootstrapDump = prepareDataAndDump(primaryDbName, null); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, bootstrapDump.dumpLocation); verifyLoadExecution(replicatedDbName, bootstrapDump.lastReplicationId, true); // First incremental, after bootstrap @@ -129,7 +126,7 @@ public void testAcidTablesBootstrap() throws Throwable { LOG.info(testName.getMethodName() + ": first incremental dump and load."); WarehouseInstance.Tuple incDump = primary.run("use " + primaryDbName) .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, incDump.dumpLocation); verifyIncLoad(replicatedDbName, incDump.lastReplicationId); // Second incremental, after bootstrap @@ -138,14 +135,14 @@ public void testAcidTablesBootstrap() throws Throwable { LOG.info(testName.getMethodName() + ": second incremental dump and load."); WarehouseInstance.Tuple inc2Dump = primary.run("use " + primaryDbName) .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, inc2Dump.dumpLocation); verifyInc2Load(replicatedDbName, inc2Dump.lastReplicationId); } @Test public void testAcidTablesMoveOptimizationBootStrap() throws Throwable { WarehouseInstance.Tuple bootstrapDump = prepareDataAndDump(primaryDbName, null); - replica.load(replicatedDbName, primaryDbName, + replica.load(replicatedDbName, bootstrapDump.dumpLocation, Collections.singletonList("'hive.repl.enable.move.optimization'='true'")); verifyLoadExecution(replicatedDbName, bootstrapDump.lastReplicationId, true); } @@ -153,10 +150,10 @@ public void testAcidTablesMoveOptimizationBootStrap() throws Throwable { @Test public void testAcidTablesMoveOptimizationIncremental() throws Throwable { WarehouseInstance.Tuple bootstrapDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, + replica.load(replicatedDbName, bootstrapDump.dumpLocation, Collections.singletonList("'hive.repl.enable.move.optimization'='true'")); WarehouseInstance.Tuple incrDump = prepareDataAndDump(primaryDbName, null); - replica.load(replicatedDbName, primaryDbName, + replica.load(replicatedDbName, incrDump.dumpLocation, Collections.singletonList("'hive.repl.enable.move.optimization'='true'")); verifyLoadExecution(replicatedDbName, incrDump.lastReplicationId, true); } @@ -199,7 +196,7 @@ public void testAcidTablesBootstrapWithOpenTxnsTimeout() throws Throwable { // Bootstrap load which should also replicate the aborted write ids on both tables. HiveConf replicaConf = replica.getConf(); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootstrapDump.dumpLocation) .run("use " + replicatedDbName) .run("show tables") .verifyResults(new String[] {"t1", "t2"}) @@ -283,7 +280,7 @@ public void run() { } // Bootstrap dump has taken snapshot before concurrent tread performed write. So, it won't see data "2". - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootstrapDump.dumpLocation) .run("use " + replicatedDbName) .run("repl status " + replicatedDbName) .verifyResult(bootstrapDump.lastReplicationId) @@ -292,7 +289,7 @@ public void run() { // Incremental should include the concurrent write of data "2" from another txn. WarehouseInstance.Tuple incrementalDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalDump.dumpLocation) .run("use " + replicatedDbName) .run("repl status " + replicatedDbName) .verifyResult(incrementalDump.lastReplicationId) @@ -357,7 +354,7 @@ public void run() { } // Bootstrap dump has taken latest list of tables and hence won't see table t1 as it is dropped. - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootstrapDump.dumpLocation) .run("use " + replicatedDbName) .run("repl status " + replicatedDbName) .verifyResult(bootstrapDump.lastReplicationId) @@ -371,7 +368,7 @@ public void run() { .run("insert into t1 values(100)") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalDump.dumpLocation) .run("use " + replicatedDbName) .run("repl status " + replicatedDbName) .verifyResult(incrementalDump.lastReplicationId) @@ -383,7 +380,7 @@ public void run() { public void testOpenTxnEvent() throws Throwable { String tableName = testName.getMethodName(); WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootStrapDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(bootStrapDump.lastReplicationId); @@ -404,12 +401,12 @@ public void testOpenTxnEvent() throws Throwable { primary.testEventCounts(primaryDbName, lastReplId, null, null, 22); // Test load - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(incrementalDump.lastReplicationId); // Test the idempotent behavior of Open and Commit Txn - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(incrementalDump.lastReplicationId); } @@ -418,7 +415,7 @@ public void testOpenTxnEvent() throws Throwable { public void testAbortTxnEvent() throws Throwable { String tableNameFail = testName.getMethodName() + "Fail"; WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootStrapDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(bootStrapDump.lastReplicationId); @@ -432,12 +429,12 @@ public void testAbortTxnEvent() throws Throwable { WarehouseInstance.Tuple incrementalDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(incrementalDump.lastReplicationId); // Test the idempotent behavior of Abort Txn - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(incrementalDump.lastReplicationId); } @@ -446,7 +443,7 @@ public void testAbortTxnEvent() throws Throwable { public void testTxnEventNonAcid() throws Throwable { String tableName = testName.getMethodName(); WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName); - replicaNonAcid.load(replicatedDbName, primaryDbName) + replicaNonAcid.load(replicatedDbName, bootStrapDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(bootStrapDump.lastReplicationId); @@ -463,7 +460,7 @@ public void testTxnEventNonAcid() throws Throwable { WarehouseInstance.Tuple incrementalDump = primary.dump(primaryDbName); - replicaNonAcid.runFailure("REPL LOAD " + primaryDbName + " INTO " + replicatedDbName + "'") + replicaNonAcid.runFailure("REPL LOAD " + replicatedDbName + " FROM '" + incrementalDump.dumpLocation + "'") .run("REPL STATUS " + replicatedDbName) .verifyResult(bootStrapDump.lastReplicationId); } @@ -502,7 +499,7 @@ public Boolean apply(@Nullable CallerArguments args) { InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier); List withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'"); - replica.loadFailure(replicatedDbName, primaryDbName, withConfigs); + replica.loadFailure(replicatedDbName, tuple.dumpLocation, withConfigs); callerVerifier.assertInjectionsPerformed(true, false); InjectableBehaviourObjectStore.resetCallerVerifier(); // reset the behaviour @@ -530,7 +527,7 @@ public Boolean apply(@Nullable CallerArguments args) { // Retry with same dump with which it was already loaded should resume the bootstrap load. // This time, it completes by adding just constraints for table t4. - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); callerVerifier.assertInjectionsPerformed(true, false); InjectableBehaviourObjectStore.resetCallerVerifier(); // reset the behaviour @@ -607,7 +604,9 @@ public void testMultiDBTxn() throws Throwable { String txnStrStart = "START TRANSACTION"; String txnStrCommit = "COMMIT"; + WarehouseInstance.Tuple incrementalDump; primary.run("alter database default set dbproperties ('repl.source.for' = '1, 2, 3')"); + WarehouseInstance.Tuple bootStrapDump = primary.dump("`*`"); primary.run("use " + primaryDbName) .run("create database " + dbName1 + " WITH DBPROPERTIES ( '" + SOURCE_OF_REPLICATION + "' = '1,2,3')") @@ -639,7 +638,7 @@ public void testMultiDBTxn() throws Throwable { .verifyResults(resultArray) .run(txnStrCommit); - primary.dump("`*`"); + incrementalDump = primary.dump("`*`"); // Due to the limitation that we can only have one instance of Persistence Manager Factory in a JVM // we are not able to create multiple embedded derby instances for two different MetaStore instances. @@ -647,10 +646,20 @@ public void testMultiDBTxn() throws Throwable { primary.run("drop database " + dbName1 + " cascade"); primary.run("drop database " + dbName2 + " cascade"); //End of additional steps - try { - replica.loadWithoutExplain("", "`*`"); - } catch (SemanticException e) { - assertEquals("REPL LOAD * is not supported", e.getMessage()); - } + + replica.loadWithoutExplain("", bootStrapDump.dumpLocation) + .run("REPL STATUS default") + .verifyResult(bootStrapDump.lastReplicationId); + + replica.loadWithoutExplain("", incrementalDump.dumpLocation) + .run("REPL STATUS " + dbName1) + .run("select key from " + dbName1 + "." + tableName + " order by key") + .verifyResults(resultArray) + .run("select key from " + dbName2 + "." + tableName + " order by key") + .verifyResults(resultArray); + + replica.run("drop database " + primaryDbName + " cascade"); + replica.run("drop database " + dbName1 + " cascade"); + replica.run("drop database " + dbName2 + " cascade"); } } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTablesBootstrap.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTablesBootstrap.java index 36841bae99..f5dd043a9c 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTablesBootstrap.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTablesBootstrap.java @@ -68,7 +68,7 @@ public void testAcidTablesBootstrapDuringIncremental() throws Throwable { WarehouseInstance.Tuple bootstrapDump = prepareDataAndDump(primaryDbName, dumpWithoutAcidClause); LOG.info(testName.getMethodName() + ": loading dump without acid tables."); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, bootstrapDump.dumpLocation); verifyLoadExecution(replicatedDbName, bootstrapDump.lastReplicationId, false); // Take a incremental dump with acid table bootstrap @@ -77,7 +77,7 @@ public void testAcidTablesBootstrapDuringIncremental() throws Throwable { LOG.info(testName.getMethodName() + ": incremental dump and load dump with acid table bootstrap."); WarehouseInstance.Tuple incrementalDump = primary.run("use " + primaryDbName) .dump(primaryDbName, dumpWithAcidBootstrapClause); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, incrementalDump.dumpLocation); verifyIncLoad(replicatedDbName, incrementalDump.lastReplicationId); // Ckpt should be set on bootstrapped tables. replica.verifyIfCkptSetForTables(replicatedDbName, acidTableNames, incrementalDump.dumpLocation); @@ -90,7 +90,7 @@ public void testAcidTablesBootstrapDuringIncremental() throws Throwable { "bootstrap."); WarehouseInstance.Tuple inc2Dump = primary.run("use " + primaryDbName) .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, inc2Dump.dumpLocation); verifyInc2Load(replicatedDbName, inc2Dump.lastReplicationId); } @@ -99,7 +99,7 @@ public void testRetryAcidTablesBootstrapFromDifferentDump() throws Throwable { WarehouseInstance.Tuple bootstrapDump = prepareDataAndDump(primaryDbName, dumpWithoutAcidClause); LOG.info(testName.getMethodName() + ": loading dump without acid tables."); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, bootstrapDump.dumpLocation); verifyLoadExecution(replicatedDbName, bootstrapDump.lastReplicationId, false); prepareIncAcidData(primaryDbName); @@ -129,7 +129,7 @@ public Boolean apply(@Nullable CallerArguments args) { try { LOG.info(testName.getMethodName() + ": loading first incremental dump with acid table bootstrap (will fail)"); - replica.loadFailure(replicatedDbName, primaryDbName); + replica.loadFailure(replicatedDbName, incDump.dumpLocation); callerVerifier.assertInjectionsPerformed(true, false); } finally { InjectableBehaviourObjectStore.resetAlterTableModifier(); @@ -149,7 +149,7 @@ public Boolean apply(@Nullable CallerArguments args) { LOG.info(testName.getMethodName() + ": trying to load second incremental dump with wrong bootstrap dump " + " specified for cleaning ACID tables. Should fail."); - replica.loadFailure(replicatedDbName, primaryDbName, loadWithClause); + replica.loadFailure(replicatedDbName, inc2Dump.dumpLocation, loadWithClause); // Set previously failed bootstrap dump to clean-up. Now, new bootstrap should overwrite the old one. loadWithClause = Collections.singletonList( @@ -159,7 +159,7 @@ public Boolean apply(@Nullable CallerArguments args) { LOG.info(testName.getMethodName() + ": trying to load second incremental dump with correct bootstrap dump " + "specified for cleaning ACID tables. Should succeed."); - replica.load(replicatedDbName, primaryDbName, loadWithClause); + replica.load(replicatedDbName, inc2Dump.dumpLocation, loadWithClause); verifyInc2Load(replicatedDbName, inc2Dump.lastReplicationId); // Once the REPL LOAD is successful, the this config should be unset or else, the subsequent REPL LOAD @@ -170,7 +170,7 @@ public Boolean apply(@Nullable CallerArguments args) { LOG.info(testName.getMethodName() + ": trying to load second incremental dump (with acid bootstrap) again." + " Should succeed."); - replica.load(replicatedDbName, primaryDbName, loadWithClause); + replica.load(replicatedDbName, inc2Dump.dumpLocation, loadWithClause); verifyInc2Load(replicatedDbName, inc2Dump.lastReplicationId); } @@ -178,7 +178,7 @@ public Boolean apply(@Nullable CallerArguments args) { public void retryIncBootstrapAcidFromDifferentDumpWithoutCleanTablesConfig() throws Throwable { WarehouseInstance.Tuple bootstrapDump = prepareDataAndDump(primaryDbName, dumpWithoutAcidClause); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, bootstrapDump.dumpLocation); prepareIncAcidData(primaryDbName); prepareIncNonAcidData(primaryDbName); @@ -186,10 +186,10 @@ public void retryIncBootstrapAcidFromDifferentDumpWithoutCleanTablesConfig() thr .dump(primaryDbName, dumpWithAcidBootstrapClause); WarehouseInstance.Tuple inc2Dump = primary.run("use " + primaryDbName) .dump(primaryDbName, dumpWithAcidBootstrapClause); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, incDump.dumpLocation); // Re-bootstrapping from different bootstrap dump without clean tables config should fail. - replica.loadFailure(replicatedDbName, primaryDbName, Collections.emptyList(), + replica.loadFailure(replicatedDbName, inc2Dump.dumpLocation, Collections.emptyList(), ErrorMsg.REPL_BOOTSTRAP_LOAD_PATH_NOT_VALID.getErrorCode()); } @@ -199,7 +199,7 @@ public void testAcidTablesBootstrapDuringIncrementalWithOpenTxnsTimeout() throws WarehouseInstance.Tuple bootstrapDump = prepareDataAndDump(primaryDbName, dumpWithoutAcidClause); LOG.info(testName.getMethodName() + ": loading dump without acid tables."); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, bootstrapDump.dumpLocation); // Open concurrent transactions, create data for incremental and take an incremental dump // with ACID table bootstrap. @@ -232,7 +232,7 @@ public void testAcidTablesBootstrapDuringIncrementalWithOpenTxnsTimeout() throws // tables t1 and t2 HiveConf replicaConf = replica.getConf(); LOG.info(testName.getMethodName() + ": loading incremental dump with ACID bootstrap."); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, incDump.dumpLocation); verifyIncLoad(replicatedDbName, incDump.lastReplicationId); // Verify if HWM is properly set after REPL LOAD verifyNextId(tables, replicatedDbName, replicaConf); @@ -257,7 +257,7 @@ public void testBootstrapAcidTablesDuringIncrementalWithConcurrentWrites() throw WarehouseInstance.Tuple bootstrapDump = prepareDataAndDump(primaryDbName, dumpWithoutAcidClause); LOG.info(testName.getMethodName() + ": loading dump without acid tables."); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, bootstrapDump.dumpLocation); // Create incremental data for incremental load with bootstrap of ACID prepareIncNonAcidData(primaryDbName); @@ -315,7 +315,7 @@ public void run() { // write. So concurrent writes won't be dumped. LOG.info(testName.getMethodName() + ": loading incremental dump containing bootstrapped ACID tables."); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, incDump.dumpLocation); verifyIncLoad(replicatedDbName, incDump.lastReplicationId); // Next Incremental should include the concurrent writes @@ -324,7 +324,7 @@ public void run() { WarehouseInstance.Tuple inc2Dump = primary.dump(primaryDbName); LOG.info(testName.getMethodName() + ": loading second normal incremental dump from event id = " + incDump.lastReplicationId); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, inc2Dump.dumpLocation); verifyInc2Load(replicatedDbName, inc2Dump.lastReplicationId); } } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index ff1de9e396..eb8a8995c7 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -77,7 +77,7 @@ public static void classLevelSetup() throws Exception { @Test public void testCreateFunctionIncrementalReplication() throws Throwable { WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootStrapDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(bootStrapDump.lastReplicationId); @@ -89,7 +89,7 @@ public void testCreateFunctionIncrementalReplication() throws Throwable { WarehouseInstance.Tuple incrementalDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(incrementalDump.lastReplicationId) .run("SHOW FUNCTIONS LIKE '" + replicatedDbName + "%'") @@ -97,7 +97,7 @@ public void testCreateFunctionIncrementalReplication() throws Throwable { replicatedDbName + ".testFunctionTwo" }); // Test the idempotent behavior of CREATE FUNCTION - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(incrementalDump.lastReplicationId) .run("SHOW FUNCTIONS LIKE '" + replicatedDbName + "%'") @@ -141,7 +141,7 @@ public Boolean apply(CallerArguments args) { List withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'", "'hive.in.repl.test.files.sorted'='true'"); try { - replica.loadFailure(replicatedDbName, primaryDbName, withConfigs); + replica.loadFailure(replicatedDbName, tuple.dumpLocation, withConfigs); callerVerifier.assertInjectionsPerformed(true, false); } finally { InjectableBehaviourObjectStore.resetCallerVerifier(); // reset the behaviour @@ -175,7 +175,7 @@ public Boolean apply(CallerArguments args) { try { // Retry with same dump with which it was already loaded should resume the bootstrap load. // This time, it completes by adding just the function f2 - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); callerVerifier.assertInjectionsPerformed(true, false); } finally { InjectableBehaviourObjectStore.resetCallerVerifier(); // reset the behaviour @@ -196,7 +196,7 @@ public void testDropFunctionIncrementalReplication() throws Throwable { + ".testFunctionAnother as 'hivemall.tools.string.StopwordUDF' " + "using jar 'ivy://io.github.myui:hivemall:0.4.0-2'"); WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootStrapDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(bootStrapDump.lastReplicationId); @@ -204,14 +204,14 @@ public void testDropFunctionIncrementalReplication() throws Throwable { WarehouseInstance.Tuple incrementalDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(incrementalDump.lastReplicationId) .run("SHOW FUNCTIONS LIKE '%testfunctionanother%'") .verifyResult(null); // Test the idempotent behavior of DROP FUNCTION - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(incrementalDump.lastReplicationId) .run("SHOW FUNCTIONS LIKE '%testfunctionanother%'") @@ -225,7 +225,7 @@ public void testBootstrapFunctionReplication() throws Throwable { + "using jar 'ivy://io.github.myui:hivemall:0.4.0-2'"); WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootStrapDump.dumpLocation) .run("SHOW FUNCTIONS LIKE '" + replicatedDbName + "%'") .verifyResult(replicatedDbName + ".testFunction"); } @@ -241,7 +241,7 @@ public void testCreateFunctionWithFunctionBinaryJarsOnHDFS() throws Throwable { WarehouseInstance.Tuple tuple = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("SHOW FUNCTIONS LIKE '" + replicatedDbName + "%'") .verifyResult(replicatedDbName + ".anotherFunction"); @@ -262,7 +262,7 @@ public void testCreateFunctionWithFunctionBinaryJarsOnHDFS() throws Throwable { @Test public void testIncrementalCreateFunctionWithFunctionBinaryJarsOnHDFS() throws Throwable { WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootStrapDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(bootStrapDump.lastReplicationId); @@ -275,7 +275,7 @@ public void testIncrementalCreateFunctionWithFunctionBinaryJarsOnHDFS() throws T WarehouseInstance.Tuple tuple = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("SHOW FUNCTIONS LIKE '" + replicatedDbName + "%'") .verifyResult(replicatedDbName + ".anotherFunction"); @@ -354,7 +354,7 @@ public void testMultipleStagesOfReplicationLoadTask() throws Throwable { List withClause = Collections.singletonList( "'" + HiveConf.ConfVars.REPL_APPROX_MAX_LOAD_TASKS.varname + "'='1'"); - replica.load(replicatedDbName, primaryDbName, withClause) + replica.load(replicatedDbName, tuple.dumpLocation, withClause) .run("use " + replicatedDbName) .run("show tables") .verifyResults(new String[] { "t1", "t2", "t3" }) @@ -382,7 +382,7 @@ public void testParallelExecutionOfReplicationBootStrapLoad() throws Throwable { .dump(primaryDbName); replica.hiveConf.setBoolVar(HiveConf.ConfVars.EXECPARALLEL, true); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("use " + replicatedDbName) .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) @@ -403,7 +403,7 @@ public void testMetadataBootstrapDump() throws Throwable { .run("insert into table1 values (1,2)") .dump(primaryDbName, Collections.singletonList("'hive.repl.dump.metadata.only'='true'")); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("use " + replicatedDbName) .run("show tables") .verifyResults(new String[] { "acid_table", "table1" }) @@ -422,7 +422,7 @@ public void testIncrementalMetadataReplication() throws Throwable { .run("insert into table1 values (1,2)") .dump(primaryDbName, Collections.singletonList("'hive.repl.dump.metadata.only'='true'")); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootstrapTuple.dumpLocation) .run("use " + replicatedDbName) .run("show tables") .verifyResults(new String[] { "table1", "table2", "table3" }) @@ -440,7 +440,7 @@ public void testIncrementalMetadataReplication() throws Throwable { "repl dump " + primaryDbName + " with ('hive.repl.dump.metadata.only'='true')" ); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incrementalOneTuple.dumpLocation) .run("use " + replicatedDbName) .run("show tables") .verifyResults(new String[] { "renamed_table1", "table2", "table3", "table4" }) @@ -458,7 +458,7 @@ public void testIncrementalMetadataReplication() throws Throwable { .dumpWithCommand("repl dump " + primaryDbName + " with ('hive.repl.dump.metadata.only'='true')" ); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, secondIncremental.dumpLocation) .run("use " + replicatedDbName) .run("show tables") .verifyResults(new String[] { "table2", "table3", "table4" }) @@ -496,7 +496,7 @@ public void testNonReplDBMetadataReplication() throws Throwable { .run("insert into table1 values (1,2)") .dump(dbName, Collections.singletonList("'hive.repl.dump.metadata.only'='true'")); - replica.load(replicatedDbName, dbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("use " + replicatedDbName) .run("show tables") .verifyResults(new String[]{"table1", "table2", "table3"}) @@ -510,7 +510,7 @@ public void testNonReplDBMetadataReplication() throws Throwable { .run("create table table4 (i int, j int)") .dump(dbName, Collections.singletonList("'hive.repl.dump.metadata.only'='true'")); - replica.load(replicatedDbName, dbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("use " + replicatedDbName) .run("show tables") .verifyResults(new String[] { "renamed_table1", "table2", "table3", "table4" }) @@ -563,12 +563,126 @@ public void testBootStrapDumpOfWarehouse() throws Throwable { // Reset ckpt and last repl ID keys to empty set for allowing bootstrap load replica.run("show databases") .verifyFailure(new String[] { primaryDbName, dbOne, dbTwo }) - .run("alter database default set dbproperties ('hive.repl.ckpt.key'='', 'repl.last.id'='')"); - try { - replica.load("", "`*`"); - } catch (SemanticException e) { - assertEquals("REPL LOAD * is not supported", e.getMessage()); - } + .run("alter database default set dbproperties ('hive.repl.ckpt.key'='', 'repl.last.id'='')") + .load("", tuple.dumpLocation) + .run("show databases") + .verifyResults(new String[] { "default", primaryDbName, dbOne, dbTwo }) + .run("use " + primaryDbName) + .run("show tables") + .verifyResults(new String[] { "t1" }) + .run("use " + dbOne) + .run("show tables") + .verifyResults(new String[] { "t1" }) + .run("use " + dbTwo) + .run("show tables") + .verifyResults(new String[] { "t1" }) + .verifyReplTargetProperty(primaryDbName) + .verifyReplTargetProperty(dbOne) + .verifyReplTargetProperty(dbTwo); + + /* + Start of cleanup + */ + + replica.run("drop database " + primaryDbName + " cascade"); + replica.run("drop database " + dbOne + " cascade"); + replica.run("drop database " + dbTwo + " cascade"); + + /* + End of cleanup + */ + } + + @Test + public void testIncrementalDumpOfWarehouse() throws Throwable { + String randomOne = RandomStringUtils.random(10, true, false); + String randomTwo = RandomStringUtils.random(10, true, false); + String dbOne = primaryDbName + randomOne; + primary.run("alter database default set dbproperties ('" + SOURCE_OF_REPLICATION + "' = '1, 2, 3')"); + WarehouseInstance.Tuple bootstrapTuple = primary + .run("use " + primaryDbName) + .run("create table t1 (i int, j int)") + .run("create database " + dbOne + " WITH DBPROPERTIES ( '" + + SOURCE_OF_REPLICATION + "' = '1,2,3')") + .run("use " + dbOne) + .run("create table t1 (i int, j int) partitioned by (load_date date) " + + "clustered by(i) into 2 buckets stored as orc tblproperties ('transactional'='true') ") + .dump("`*`", Collections.singletonList("'hive.repl.dump.metadata.only'='true'")); + + String dbTwo = primaryDbName + randomTwo; + WarehouseInstance.Tuple incrementalTuple = primary + .run("create database " + dbTwo + " WITH DBPROPERTIES ( '" + + SOURCE_OF_REPLICATION + "' = '1,2,3')") + .run("use " + dbTwo) + .run("create table t1 (i int, j int)") + .run("use " + dbOne) + .run("create table t2 (a int, b int)") + .dump("`*`", Arrays.asList("'hive.repl.dump.metadata.only'='true'")); + + /* + Due to the limitation that we can only have one instance of Persistence Manager Factory in a JVM + we are not able to create multiple embedded derby instances for two different MetaStore instances. + */ + + primary.run("drop database " + primaryDbName + " cascade"); + primary.run("drop database " + dbOne + " cascade"); + primary.run("drop database " + dbTwo + " cascade"); + + /* + End of additional steps + */ + + // Reset ckpt and last repl ID keys to empty set for allowing bootstrap load + replica.run("show databases") + .verifyFailure(new String[] { primaryDbName, dbOne, dbTwo }) + .run("alter database default set dbproperties ('hive.repl.ckpt.key'='', 'repl.last.id'='')") + .load("", bootstrapTuple.dumpLocation) + .run("show databases") + .verifyResults(new String[] { "default", primaryDbName, dbOne }) + .run("use " + primaryDbName) + .run("show tables") + .verifyResults(new String[] { "t1" }) + .run("use " + dbOne) + .run("show tables") + .verifyResults(new String[] { "t1" }) + .verifyReplTargetProperty(primaryDbName) + .verifyReplTargetProperty(dbOne) + .verifyReplTargetProperty(dbTwo); + + assertTrue(ReplUtils.isFirstIncPending(replica.getDatabase("default").getParameters())); + assertTrue(ReplUtils.isFirstIncPending(replica.getDatabase(primaryDbName).getParameters())); + assertTrue(ReplUtils.isFirstIncPending(replica.getDatabase(dbOne).getParameters())); + + replica.load("", incrementalTuple.dumpLocation) + .run("show databases") + .verifyResults(new String[] { "default", primaryDbName, dbOne, dbTwo }) + .run("use " + dbTwo) + .run("show tables") + .verifyResults(new String[] { "t1" }) + .run("use " + dbOne) + .run("show tables") + .verifyResults(new String[] { "t1", "t2" }) + .verifyReplTargetProperty(primaryDbName) + .verifyReplTargetProperty(dbOne) + .verifyReplTargetProperty(dbTwo); + + assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase("default").getParameters())); + assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase(primaryDbName).getParameters())); + assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase(dbOne).getParameters())); + assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase(dbTwo).getParameters())); + + /* + Start of cleanup + */ + + replica.run("drop database " + primaryDbName + " cascade"); + replica.run("drop database " + dbOne + " cascade"); + replica.run("drop database " + dbTwo + " cascade"); + + /* + End of cleanup + */ + } @Test @@ -588,7 +702,7 @@ public void testReplLoadFromSourceUsingWithClause() throws Throwable { .dump(primaryDbName); // Run load on primary itself - primary.load(replicatedDbName, primaryDbName, withConfigs) + primary.load(replicatedDbName, bootstrapTuple.dumpLocation, withConfigs) .status(replicatedDbName, withConfigs) .verifyResult(bootstrapTuple.lastReplicationId); @@ -613,7 +727,7 @@ public void testReplLoadFromSourceUsingWithClause() throws Throwable { .dump(primaryDbName, Collections.emptyList()); // Run load on primary itself - primary.load(replicatedDbName, primaryDbName, withConfigs) + primary.load(replicatedDbName, incrementalOneTuple.dumpLocation, withConfigs) .status(replicatedDbName, withConfigs) .verifyResult(incrementalOneTuple.lastReplicationId); @@ -642,7 +756,7 @@ public void testReplLoadFromSourceUsingWithClause() throws Throwable { .dump(primaryDbName, Collections.emptyList()); // Run load on primary itself - primary.load(replicatedDbName, primaryDbName, withConfigs) + primary.load(replicatedDbName, secondIncremental.dumpLocation, withConfigs) .status(replicatedDbName, withConfigs) .verifyResult(secondIncremental.lastReplicationId); @@ -676,7 +790,7 @@ public void testIncrementalReplWithEventsBatchHavingDropCreateTable() throws Thr WarehouseInstance.Tuple bootstrapTuple = primary.dump(primaryDbName); // Bootstrap load in replica - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootstrapTuple.dumpLocation) .status(replicatedDbName) .verifyResult(bootstrapTuple.lastReplicationId); @@ -701,7 +815,7 @@ public void testIncrementalReplWithEventsBatchHavingDropCreateTable() throws Thr .dump(primaryDbName, Collections.emptyList()); // First incremental load - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, firstIncremental.dumpLocation) .status(replicatedDbName) .verifyResult(firstIncremental.lastReplicationId) .run("use " + replicatedDbName) @@ -713,7 +827,7 @@ public void testIncrementalReplWithEventsBatchHavingDropCreateTable() throws Thr .verifyResults(new String[] {"1"}); // Second incremental load - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, secondIncremental.dumpLocation) .status(replicatedDbName) .verifyResult(secondIncremental.lastReplicationId) .run("use " + replicatedDbName) @@ -731,7 +845,7 @@ public void testIncrementalReplWithDropAndCreateTableDifferentPartitionTypeAndIn WarehouseInstance.Tuple bootstrapTuple = primary.dump(primaryDbName); // Bootstrap load in replica - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootstrapTuple.dumpLocation) .status(replicatedDbName) .verifyResult(bootstrapTuple.lastReplicationId); @@ -759,7 +873,7 @@ public void testIncrementalReplWithDropAndCreateTableDifferentPartitionTypeAndIn .dump(primaryDbName, Collections.emptyList()); // First incremental load - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, firstIncremental.dumpLocation) .status(replicatedDbName) .verifyResult(firstIncremental.lastReplicationId) .run("use " + replicatedDbName) @@ -771,7 +885,7 @@ public void testIncrementalReplWithDropAndCreateTableDifferentPartitionTypeAndIn .verifyResults(new String[] { "3" }); // Second incremental load - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, secondIncremental.dumpLocation) .status(replicatedDbName) .verifyResult(secondIncremental.lastReplicationId) .run("use " + replicatedDbName) @@ -824,7 +938,7 @@ public void testShouldDumpMetaDataForNonNativeTableIfSetMeataDataOnly() throws T .dump(primaryDbName, Collections.singletonList("'hive.repl.dump.metadata.only'='true'")); // Bootstrap load in replica - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootstrapTuple.dumpLocation) .status(replicatedDbName) .verifyResult(bootstrapTuple.lastReplicationId) .run("use " + replicatedDbName) @@ -849,13 +963,13 @@ private void verifyIfSrcOfReplPropMissing(Map props) { public void testIncrementalDumpEmptyDumpDirectory() throws Throwable { WarehouseInstance.Tuple tuple = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .status(replicatedDbName) .verifyResult(tuple.lastReplicationId); tuple = primary.dump(primaryDbName, Collections.emptyList()); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .status(replicatedDbName) .verifyResult(tuple.lastReplicationId); @@ -866,26 +980,26 @@ public void testIncrementalDumpEmptyDumpDirectory() throws Throwable { .dump(primaryDbName, Collections.emptyList()); // Incremental load to existing database with empty dump directory should set the repl id to the last event at src. - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .status(replicatedDbName) .verifyResult(tuple.lastReplicationId); // Bootstrap load from an empty dump directory should return empty load directory error. tuple = primary.dump("someJunkDB", Collections.emptyList()); try { - replica.runCommand("REPL LOAD someJunkDB into someJunkDB"); + replica.runCommand("REPL LOAD someJunkDB from '" + tuple.dumpLocation + "'"); assert false; } catch (CommandProcessorException e) { assertTrue(e.getMessage().toLowerCase().contains("semanticException no data to load in path".toLowerCase())); } - // Bootstrap load from an empty dump directory should return empty load directory error. Since we have repl status - //check on target + // Incremental load to non existing db should return database not exist error. tuple = primary.dump("someJunkDB"); try { - replica.runCommand("REPL LOAD someJunkDB into someJunkDB "); + replica.runCommand("REPL LOAD someJunkDB from '" + tuple.dumpLocation+"'"); } catch (CommandProcessorException e) { - assertTrue(e.getMessage().toLowerCase().contains("semanticException no data to load in path".toLowerCase())); + assertTrue(e.getMessage().toLowerCase().contains( + "org.apache.hadoop.hive.ql.ddl.DDLTask. Database does not exist: someJunkDB".toLowerCase())); } primary.run(" drop database if exists " + testDbName + " cascade"); @@ -895,7 +1009,7 @@ public void testIncrementalDumpEmptyDumpDirectory() throws Throwable { public void testIncrementalDumpMultiIteration() throws Throwable { WarehouseInstance.Tuple bootstrapTuple = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootstrapTuple.dumpLocation) .status(replicatedDbName) .verifyResult(bootstrapTuple.lastReplicationId); @@ -908,7 +1022,7 @@ public void testIncrementalDumpMultiIteration() throws Throwable { .run("insert into table3 partition(country='india') values(3)") .dump(primaryDbName, Collections.emptyList()); - replica.load(replicatedDbName, primaryDbName, + replica.load(replicatedDbName, incremental.dumpLocation, Collections.singletonList("'hive.repl.approx.max.load.tasks'='10'")) .status(replicatedDbName) .verifyResult(incremental.lastReplicationId) @@ -933,7 +1047,7 @@ public void testIncrementalDumpMultiIteration() throws Throwable { FileStatus[] fileStatus = fs.listStatus(path); int numEvents = fileStatus.length - 1; //one is metadata file - replica.load(replicatedDbName, primaryDbName, + replica.load(replicatedDbName, incremental.dumpLocation, Collections.singletonList("'hive.repl.approx.max.load.tasks'='1'")) .run("use " + replicatedDbName) .run("show tables") @@ -953,7 +1067,7 @@ public void testIfCkptAndSourceOfReplPropsIgnoredByReplDump() throws Throwable { .dump(primaryDbName); // Bootstrap Repl A -> B - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuplePrimary.dumpLocation) .run("repl status " + replicatedDbName) .verifyResult(tuplePrimary.lastReplicationId) .run("show tblproperties t1('custom.property')") @@ -965,12 +1079,12 @@ public void testIfCkptAndSourceOfReplPropsIgnoredByReplDump() throws Throwable { // do a empty incremental load to allow dump of replicatedDbName WarehouseInstance.Tuple temp = primary.dump(primaryDbName, Collections.emptyList()); - replica.load(replicatedDbName, primaryDbName); // first successful incremental load. + replica.load(replicatedDbName, temp.dumpLocation); // first successful incremental load. // Bootstrap Repl B -> C WarehouseInstance.Tuple tupleReplica = replica.dump(replicatedDbName); String replDbFromReplica = replicatedDbName + "_dupe"; - replica.load(replDbFromReplica, replicatedDbName) + replica.load(replDbFromReplica, tupleReplica.dumpLocation) .run("use " + replDbFromReplica) .run("repl status " + replDbFromReplica) .verifyResult(tupleReplica.lastReplicationId) @@ -999,7 +1113,7 @@ public void testIfCkptAndSourceOfReplPropsIgnoredByReplDump() throws Throwable { .dump(primaryDbName, Collections.emptyList()); // Incremental Repl A -> B with alters on db/table/partition - WarehouseInstance.Tuple tupleReplicaInc = replica.load(replicatedDbName, primaryDbName) + WarehouseInstance.Tuple tupleReplicaInc = replica.load(replicatedDbName, tuplePrimaryInc.dumpLocation) .run("repl status " + replicatedDbName) .verifyResult(tuplePrimaryInc.lastReplicationId) .dump(replicatedDbName, Collections.emptyList()); @@ -1013,7 +1127,7 @@ public void testIfCkptAndSourceOfReplPropsIgnoredByReplDump() throws Throwable { verifyIfCkptPropMissing(india.getParameters()); // Incremental Repl B -> C with alters on db/table/partition - replica.load(replDbFromReplica, replicatedDbName) + replica.load(replDbFromReplica, tupleReplicaInc.dumpLocation) .run("use " + replDbFromReplica) .run("repl status " + replDbFromReplica) .verifyResult(tupleReplicaInc.lastReplicationId) @@ -1044,7 +1158,7 @@ public void testIfCkptPropIgnoredByExport() throws Throwable { // Bootstrap Repl A -> B and then export table t1 String path = "hdfs:///tmp/" + replicatedDbName + "/"; String exportPath = "'" + path + "1/'"; - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuplePrimary.dumpLocation) .run("repl status " + replicatedDbName) .verifyResult(tuplePrimary.lastReplicationId) .run("use " + replicatedDbName) @@ -1085,7 +1199,7 @@ public void testIfBootstrapReplLoadFailWhenRetryAfterBootstrapComplete() throws .run("insert into table t2 partition(country='us') values ('sfo')") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("use " + replicatedDbName) .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) @@ -1098,12 +1212,12 @@ public void testIfBootstrapReplLoadFailWhenRetryAfterBootstrapComplete() throws replica.verifyIfCkptSet(replicatedDbName, tuple.dumpLocation); // Retry with same dump with which it was already loaded also fails. - replica.loadFailure(replicatedDbName, primaryDbName); + replica.loadFailure(replicatedDbName, tuple.dumpLocation); // Retry from same dump when the database is empty is also not allowed. replica.run("drop table t1") .run("drop table t2") - .loadFailure(replicatedDbName, primaryDbName); + .loadFailure(replicatedDbName, tuple.dumpLocation); } @Test @@ -1143,7 +1257,7 @@ public Boolean apply(CallerArguments args) { // Trigger bootstrap dump which just creates table t1 and other tables (t2, t3) and constraints not loaded. List withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'"); try { - replica.loadFailure(replicatedDbName, primaryDbName, withConfigs); + replica.loadFailure(replicatedDbName, tuple.dumpLocation, withConfigs); callerVerifier.assertInjectionsPerformed(true, false); } finally { InjectableBehaviourObjectStore.resetCallerVerifier(); // reset the behaviour @@ -1179,7 +1293,7 @@ public Boolean apply(CallerArguments args) { try { // Retry with same dump with which it was already loaded should resume the bootstrap load. // This time, it fails when try to load the foreign key constraints. All other constraints are loaded. - replica.loadFailure(replicatedDbName, primaryDbName, withConfigs); + replica.loadFailure(replicatedDbName, tuple.dumpLocation, withConfigs); callerVerifier.assertInjectionsPerformed(true, false); } finally { InjectableBehaviourObjectStore.resetCallerVerifier(); // reset the behaviour @@ -1217,7 +1331,7 @@ public Boolean apply(CallerArguments args) { try { // Retry with same dump with which it was already loaded should resume the bootstrap load. // This time, it completes by adding just foreign key constraints for table t2. - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); callerVerifier.assertInjectionsPerformed(true, false); } finally { InjectableBehaviourObjectStore.resetCallerVerifier(); // reset the behaviour @@ -1268,7 +1382,7 @@ public Boolean apply(List ptns) { // Make sure that there's some order in which the objects are loaded. List withConfigs = Arrays.asList("'hive.repl.approx.max.load.tasks'='1'", "'hive.in.repl.test.files.sorted'='true'"); - replica.loadFailure(replicatedDbName, primaryDbName, withConfigs); + replica.loadFailure(replicatedDbName, tuple.dumpLocation, withConfigs); InjectableBehaviourObjectStore.setAlterPartitionsBehaviour(null); // reset the behaviour alterPartitionStub.assertInjectionsPerformed(true, false); @@ -1300,7 +1414,7 @@ public Boolean apply(@Nullable CallerArguments args) { try { // Retry with same dump with which it was already loaded should resume the bootstrap load. // This time, it completes by adding remaining partitions and function. - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); callerVerifier.assertInjectionsPerformed(false, false); } finally { InjectableBehaviourObjectStore.resetCallerVerifier(); // reset the behaviour @@ -1327,7 +1441,7 @@ public void testMoveOptimizationBootstrapReplLoadRetryAfterFailure() throws Thro .dump(primaryDbName); testMoveOptimization(primaryDbName, replicatedDbName, replicatedDbName_CM, "t2", - "ADD_PARTITION"); + "ADD_PARTITION", tuple); } @Test @@ -1340,8 +1454,8 @@ public void testMoveOptimizationIncrementalFailureAfterCopyReplace() throws Thro .run("insert into table t2 partition(country='india') values ('bangalore')") .run("create table t1 (place string) partitioned by (country string)") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, withConfigs); - replica.load(replicatedDbName_CM, primaryDbName, withConfigs); + replica.load(replicatedDbName, tuple.dumpLocation, withConfigs); + replica.load(replicatedDbName_CM, tuple.dumpLocation, withConfigs); replica.run("alter database " + replicatedDbName + " set DBPROPERTIES ('" + SOURCE_OF_REPLICATION + "' = '1,2,3')") .run("alter database " + replicatedDbName_CM + " set DBPROPERTIES ('" + SOURCE_OF_REPLICATION + "' = '1,2,3')"); @@ -1349,7 +1463,7 @@ public void testMoveOptimizationIncrementalFailureAfterCopyReplace() throws Thro .run("insert overwrite table t1 select * from t2") .dump(primaryDbName, Collections.emptyList()); - testMoveOptimization(primaryDbName, replicatedDbName, replicatedDbName_CM, "t1", "ADD_PARTITION"); + testMoveOptimization(primaryDbName, replicatedDbName, replicatedDbName_CM, "t1", "ADD_PARTITION", tuple); } @Test @@ -1357,24 +1471,24 @@ public void testMoveOptimizationIncrementalFailureAfterCopy() throws Throwable { List withConfigs = Collections.singletonList("'hive.repl.enable.move.optimization'='true'"); String replicatedDbName_CM = replicatedDbName + "_CM"; - primary.run("use " + primaryDbName) + WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName) .run("create table t2 (place string) partitioned by (country string)") .run("ALTER TABLE t2 ADD PARTITION (country='india')") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, withConfigs); - replica.load(replicatedDbName_CM, primaryDbName, withConfigs); + replica.load(replicatedDbName, tuple.dumpLocation, withConfigs); + replica.load(replicatedDbName_CM, tuple.dumpLocation, withConfigs); replica.run("alter database " + replicatedDbName + " set DBPROPERTIES ('" + SOURCE_OF_REPLICATION + "' = '1,2,3')") .run("alter database " + replicatedDbName_CM + " set DBPROPERTIES ('" + SOURCE_OF_REPLICATION + "' = '1,2,3')"); - WarehouseInstance.Tuple tuple = primary.run("use " + primaryDbName) + tuple = primary.run("use " + primaryDbName) .run("insert into table t2 partition(country='india') values ('bangalore')") .dump(primaryDbName, Collections.emptyList()); - testMoveOptimization(primaryDbName, replicatedDbName, replicatedDbName_CM, "t2", "INSERT"); + testMoveOptimization(primaryDbName, replicatedDbName, replicatedDbName_CM, "t2", "INSERT", tuple); } private void testMoveOptimization(String primaryDb, String replicaDb, String replicatedDbName_CM, - String tbl, String eventType) throws Throwable { + String tbl, String eventType, WarehouseInstance.Tuple tuple) throws Throwable { List withConfigs = Collections.singletonList("'hive.repl.enable.move.optimization'='true'"); @@ -1396,13 +1510,13 @@ public Boolean apply(NotificationEvent entry) { InjectableBehaviourObjectStore.setAddNotificationModifier(callerVerifier); try { - replica.loadFailure(replicaDb, primaryDbName, withConfigs); + replica.loadFailure(replicaDb, tuple.dumpLocation, withConfigs); } finally { InjectableBehaviourObjectStore.resetAddNotificationModifier(); } callerVerifier.assertInjectionsPerformed(true, false); - replica.load(replicaDb, primaryDbName, withConfigs); + replica.load(replicaDb, tuple.dumpLocation, withConfigs); replica.run("use " + replicaDb) .run("select country from " + tbl + " where country == 'india'") @@ -1413,13 +1527,13 @@ public Boolean apply(NotificationEvent entry) { InjectableBehaviourObjectStore.setAddNotificationModifier(callerVerifier); try { - replica.loadFailure(replicatedDbName_CM, primaryDbName, withConfigs); + replica.loadFailure(replicatedDbName_CM, tuple.dumpLocation, withConfigs); } finally { InjectableBehaviourObjectStore.resetAddNotificationModifier(); } callerVerifier.assertInjectionsPerformed(true, false); - replica.load(replicatedDbName_CM, primaryDbName, withConfigs); + replica.load(replicatedDbName_CM, tuple.dumpLocation, withConfigs); replica.run("use " + replicatedDbName_CM) .run("select country from " + tbl + " where country == 'india'") @@ -1459,7 +1573,7 @@ public Boolean apply(@Nullable CallerArguments args) { // again from start. InjectableBehaviourObjectStore.setAlterTableModifier(callerVerifier); try { - replica.loadFailure(replicatedDbName, primaryDbName); + replica.loadFailure(replicatedDbName, tuple.dumpLocation); callerVerifier.assertInjectionsPerformed(true, false); } finally { InjectableBehaviourObjectStore.resetAlterTableModifier(); @@ -1469,7 +1583,7 @@ public Boolean apply(@Nullable CallerArguments args) { // is loaded before t2. So that scope is set to table in first iteration for table t1. In the next iteration, it // loads only remaining partitions of t2, so that the table tracker has no tasks. List withConfigs = Arrays.asList("'hive.in.repl.test.files.sorted'='true'"); - replica.load(replicatedDbName, primaryDbName, withConfigs); + replica.load(replicatedDbName, tuple.dumpLocation, withConfigs); replica.run("use " + replicatedDbName) .run("repl status " + replicatedDbName) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java index df304c2607..6a9cf1e4af 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java @@ -108,7 +108,7 @@ public void replicationWithoutExternalTables() throws Throwable { assertFalse(primary.miniDFSCluster.getFileSystem() .exists(new Path(new Path(tuple.dumpLocation, primaryDbName.toLowerCase()), FILE_NAME))); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) .run("use " + replicatedDbName) @@ -128,7 +128,7 @@ public void replicationWithoutExternalTables() throws Throwable { assertFalse(primary.miniDFSCluster.getFileSystem() .exists(new Path(tuple.dumpLocation, FILE_NAME))); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 't3'") .verifyFailure(new String[] { "t3" }) @@ -155,7 +155,7 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { List withClauseOptions = externalTableBasePathWithClause(); - replica.load(replicatedDbName, primaryDbName, withClauseOptions) + replica.load(replicatedDbName, tuple.dumpLocation, withClauseOptions) .run("use " + replicatedDbName) .run("show tables like 't1'") .verifyResult("t1") @@ -166,7 +166,8 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { .run("select country from t2 where country = 'us'") .verifyResult("us") .run("select country from t2 where country = 'france'") - .verifyResult("france"); + .verifyResult("france") + .run("show partitions t2").verifyResults(new String[] {"country=france", "country=india", "country=us"}); // Ckpt should be set on bootstrapped db. replica.verifyIfCkptSet(replicatedDbName, tuple.dumpLocation); @@ -184,7 +185,7 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { assertExternalFileInfo(Arrays.asList("t1", "t2", "t3", "t4"), new Path(tuple.dumpLocation, FILE_NAME)); - replica.load(replicatedDbName, primaryDbName, withClauseOptions) + replica.load(replicatedDbName, tuple.dumpLocation, withClauseOptions) .run("use " + replicatedDbName) .run("show tables like 't3'") .verifyResult("t3") @@ -250,13 +251,13 @@ public void externalTableReplicationWithCustomPaths() throws Throwable { "'distcp.options.update'=''" ); - primary.run("use " + primaryDbName) + WarehouseInstance.Tuple bootstrapTuple = primary.run("use " + primaryDbName) .run("create external table a (i int, j int) " + "row format delimited fields terminated by ',' " + "location '" + externalTableLocation.toUri() + "'") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, bootstrapTuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 'a'") .verifyResults(Collections.singletonList("a")) @@ -271,10 +272,10 @@ public void externalTableReplicationWithCustomPaths() throws Throwable { outputStream.write("13,21\n".getBytes()); } - primary.run("create table b (i int)") + WarehouseInstance.Tuple incrementalTuple = primary.run("create table b (i int)") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, incrementalTuple.dumpLocation, loadWithClause) .run("select i From a") .verifyResults(new String[] { "1", "13" }) .run("select j from a") @@ -283,11 +284,11 @@ public void externalTableReplicationWithCustomPaths() throws Throwable { // alter table location to something new. externalTableLocation = new Path("/" + testName.getMethodName() + "/" + primaryDbName + "/new_location/a/"); - primary.run("use " + primaryDbName) + incrementalTuple = primary.run("use " + primaryDbName) .run("alter table a set location '" + externalTableLocation + "'") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, incrementalTuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("select i From a") .verifyResults(Collections.emptyList()); @@ -313,7 +314,7 @@ public void externalTableWithPartitions() throws Throwable { assertExternalFileInfo(Collections.singletonList("t2"), new Path(new Path(tuple.dumpLocation, primaryDbName.toLowerCase()), FILE_NAME)); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 't2'") .verifyResults(new String[] { "t2" }) @@ -337,7 +338,7 @@ public void externalTableWithPartitions() throws Throwable { assertExternalFileInfo(Collections.singletonList("t2"), new Path(tuple.dumpLocation, FILE_NAME)); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("select distinct(country) from t2") .verifyResults(new String[] { "india", "australia" }) @@ -357,12 +358,12 @@ public void externalTableWithPartitions() throws Throwable { outputStream.write("paris".getBytes()); } - primary.run("use " + primaryDbName) + tuple = primary.run("use " + primaryDbName) .run("ALTER TABLE t2 ADD PARTITION (country='france') LOCATION '" + customPartitionLocation .toString() + "'") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("select place from t2 where country='france'") .verifyResults(new String[] { "paris" }) @@ -372,11 +373,11 @@ public void externalTableWithPartitions() throws Throwable { String tmpLocation = "/tmp/" + System.nanoTime(); primary.miniDFSCluster.getFileSystem().mkdirs(new Path(tmpLocation), new FsPermission("777")); - primary.run("use " + primaryDbName) + tuple = primary.run("use " + primaryDbName) .run("alter table t2 partition (country='france') set location '" + tmpLocation + "'") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("select place from t2 where country='france'") .verifyResults(new String[] {}) @@ -391,19 +392,19 @@ public void externalTableWithPartitions() throws Throwable { String tmpLocation2 = "/tmp/" + System.nanoTime() + "_2"; primary.miniDFSCluster.getFileSystem().mkdirs(new Path(tmpLocation2), new FsPermission("777")); - primary.run("use " + primaryDbName) + tuple = primary.run("use " + primaryDbName) .run("insert into table t2 partition(country='france') values ('lyon')") .run("alter table t2 set location '" + tmpLocation2 + "'") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause); + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause); assertTablePartitionLocation(primaryDbName + ".t2", replicatedDbName + ".t2"); } @Test public void externalTableIncrementalReplication() throws Throwable { WarehouseInstance.Tuple tuple = primary.dumpWithCommand("repl dump " + primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); Path externalTableLocation = new Path("/" + testName.getMethodName() + "/t1/"); @@ -433,7 +434,7 @@ public void externalTableIncrementalReplication() throws Throwable { } List loadWithClause = externalTableBasePathWithClause(); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 't1'") .verifyResult("t1") @@ -454,7 +455,7 @@ public void externalTableIncrementalReplication() throws Throwable { tuple = primary.dump(primaryDbName); assertExternalFileInfo(Collections.singletonList("t1"), new Path(tuple.dumpLocation, FILE_NAME)); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 't1'") .verifyResult("t1") @@ -475,7 +476,7 @@ public void externalTableIncrementalReplication() throws Throwable { .run("alter table t1 drop partition (country='us')") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("select * From t1") .verifyResults(new String[] {}) .verifyReplTargetProperty(replicatedDbName); @@ -507,7 +508,7 @@ public void bootstrapExternalTablesDuringIncrementalPhase() throws Throwable { assertFalse(primary.miniDFSCluster.getFileSystem() .exists(new Path(new Path(tuple.dumpLocation, primaryDbName.toLowerCase()), FILE_NAME))); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(tuple.lastReplicationId) .run("use " + replicatedDbName) @@ -547,7 +548,7 @@ public void bootstrapExternalTablesDuringIncrementalPhase() throws Throwable { tblPath = new Path(dbPath, "t3"); assertTrue(primary.miniDFSCluster.getFileSystem().exists(tblPath)); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(tuple.lastReplicationId) .run("use " + replicatedDbName) @@ -601,7 +602,7 @@ public void retryBootstrapExternalTablesFromDifferentDump() throws Throwable { .run("create table t3 as select * from t1") .dump(primaryDbName, dumpWithClause); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tupleBootstrapWithoutExternal.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(tupleBootstrapWithoutExternal.lastReplicationId) .run("use " + replicatedDbName) @@ -639,7 +640,7 @@ public Boolean apply(@Nullable CallerArguments args) { // In the retry, these half baked tables should be dropped and bootstrap should be successful. InjectableBehaviourObjectStore.setAlterTableModifier(callerVerifier); try { - replica.loadFailure(replicatedDbName, primaryDbName, loadWithClause); + replica.loadFailure(replicatedDbName, tupleIncWithExternalBootstrap.dumpLocation, loadWithClause); callerVerifier.assertInjectionsPerformed(true, false); } finally { InjectableBehaviourObjectStore.resetAlterTableModifier(); @@ -658,7 +659,7 @@ public Boolean apply(@Nullable CallerArguments args) { // So, REPL LOAD fails. loadWithClause.add("'" + REPL_CLEAN_TABLES_FROM_BOOTSTRAP_CONFIG + "'='" + tupleBootstrapWithoutExternal.dumpLocation + "'"); - replica.loadFailure(replicatedDbName, primaryDbName, loadWithClause); + replica.loadFailure(replicatedDbName, tupleNewIncWithExternalBootstrap.dumpLocation, loadWithClause); loadWithClause.remove("'" + REPL_CLEAN_TABLES_FROM_BOOTSTRAP_CONFIG + "'='" + tupleBootstrapWithoutExternal.dumpLocation + "'"); @@ -668,7 +669,7 @@ public Boolean apply(@Nullable CallerArguments args) { // Verify if bootstrapping with same dump is idempotent and return same result for (int i = 0; i < 2; i++) { - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tupleNewIncWithExternalBootstrap.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 't1'") .verifyFailure(new String[]{"t1"}) @@ -719,7 +720,7 @@ public void testExternalTablesIncReplicationWithConcurrentDropTable() throws Thr .run("insert into table t1 values (1)") .dump(primaryDbName, dumpWithClause); - replica.load(replicatedDbName, primaryDbName, loadWithClause); + replica.load(replicatedDbName, tupleBootstrap.dumpLocation, loadWithClause); // Insert a row into "t1" and create another external table using data from "t1". primary.run("use " + primaryDbName) @@ -758,7 +759,7 @@ public Table apply(@Nullable Table table) { // The newly inserted data "2" should be missing in table "t1". But, table t2 should exist and have // inserted data. - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tupleInc.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("select id from t1 order by id") .verifyResult("1") @@ -778,14 +779,14 @@ public void testIncrementalDumpEmptyDumpDirectory() throws Throwable { .run("insert into table t1 values (2)") .dump(primaryDbName, dumpWithClause); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .status(replicatedDbName) .verifyResult(tuple.lastReplicationId); // This looks like an empty dump but it has the ALTER TABLE event created by the previous // dump. We need it here so that the next dump won't have any events. WarehouseInstance.Tuple incTuple = primary.dump(primaryDbName, dumpWithClause); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, incTuple.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(incTuple.lastReplicationId); @@ -800,7 +801,7 @@ public void testIncrementalDumpEmptyDumpDirectory() throws Throwable { Long.valueOf(inc2Tuple.lastReplicationId).longValue()); // Incremental load to existing database with empty dump directory should set the repl id to the last event at src. - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, inc2Tuple.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(inc2Tuple.lastReplicationId); } @@ -820,7 +821,7 @@ public void testExtTableBootstrapDuringIncrementalWithoutAnyEvents() throws Thro .run("insert into table t2 values (1)") .dump(primaryDbName, dumpWithClause); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, bootstrapDump.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(bootstrapDump.lastReplicationId) .run("use " + replicatedDbName) @@ -833,7 +834,7 @@ public void testExtTableBootstrapDuringIncrementalWithoutAnyEvents() throws Thro // This looks like an empty dump but it has the ALTER TABLE event created by the previous // dump. We need it here so that the next dump won't have any events. WarehouseInstance.Tuple incTuple = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, incTuple.dumpLocation) .status(replicatedDbName) .verifyResult(incTuple.lastReplicationId); @@ -843,7 +844,7 @@ public void testExtTableBootstrapDuringIncrementalWithoutAnyEvents() throws Thro WarehouseInstance.Tuple inc2Tuple = primary.run("use " + primaryDbName) .dump(primaryDbName, dumpWithClause); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, inc2Tuple.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(inc2Tuple.lastReplicationId) .run("use " + replicatedDbName) @@ -869,7 +870,7 @@ public void replicationWithTableNameContainsKeywords() throws Throwable { .run("insert into table t2_constraints partition(country='france') values ('paris')") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) .run("use " + replicatedDbName) @@ -881,7 +882,7 @@ public void replicationWithTableNameContainsKeywords() throws Throwable { .verifyResults(new String[] {"1", "2"}) .verifyReplTargetProperty(replicatedDbName); - primary.run("use " + primaryDbName) + tuple = primary.run("use " + primaryDbName) .run("create external table t3_bootstrap (id int)") .run("insert into table t3_bootstrap values (10)") .run("insert into table t3_bootstrap values (20)") @@ -890,7 +891,7 @@ public void replicationWithTableNameContainsKeywords() throws Throwable { .run("insert into table t4_tables values (20)") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 't3_bootstrap'") .verifyResults(new String[] {"t3_bootstrap"}) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java index bf691f331c..098594563c 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java @@ -107,7 +107,7 @@ public void replicationWithoutExternalTables() throws Throwable { assertFalse(primary.miniDFSCluster.getFileSystem() .exists(new Path(new Path(tuple.dumpLocation, primaryDbName.toLowerCase()), FILE_NAME))); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) .run("use " + replicatedDbName) @@ -127,7 +127,7 @@ public void replicationWithoutExternalTables() throws Throwable { assertFalse(primary.miniDFSCluster.getFileSystem() .exists(new Path(tuple.dumpLocation, FILE_NAME))); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 't3'") .verifyFailure(new String[] {"t3"}) @@ -153,7 +153,7 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { List withClauseOptions = externalTableBasePathWithClause(); - replica.load(replicatedDbName, primaryDbName, withClauseOptions) + replica.load(replicatedDbName, tuple.dumpLocation, withClauseOptions) .run("use " + replicatedDbName) .run("show tables like 't1'") .verifyResult("t1") @@ -164,7 +164,8 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { .run("select country from t2 where country = 'us'") .verifyResult(null) .run("select country from t2 where country = 'france'") - .verifyResult(null); + .verifyResult(null) + .run("show partitions t2").verifyResults(new String[] {"country=france", "country=india", "country=us"}); // Ckpt should be set on bootstrapped db. replica.verifyIfCkptSet(replicatedDbName, tuple.dumpLocation); @@ -178,7 +179,7 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { // verify that the external table info is written correctly for incremental assertFalseExternalFileInfo(new Path(tuple.dumpLocation, FILE_NAME)); - replica.load(replicatedDbName, primaryDbName, withClauseOptions) + replica.load(replicatedDbName, tuple.dumpLocation, withClauseOptions) .run("use " + replicatedDbName) .run("show tables like 't3'") .verifyResult("t3") @@ -217,7 +218,7 @@ public void externalTableReplicationWithCustomPaths() throws Throwable { + "location '" + externalTableLocation.toUri() + "'") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, bootstrapTuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 'a'") .verifyResults(Collections.singletonList("a")) @@ -233,7 +234,7 @@ public void externalTableReplicationWithCustomPaths() throws Throwable { WarehouseInstance.Tuple incrementalTuple = primary.run("create table b (i int)") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, incrementalTuple.dumpLocation, loadWithClause) .run("select i From a") .verifyResults(new String[] {}) .run("select j from a") @@ -246,7 +247,7 @@ public void externalTableReplicationWithCustomPaths() throws Throwable { .run("alter table a set location '" + externalTableLocation + "'") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, incrementalTuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("select i From a") .verifyResults(Collections.emptyList()); @@ -270,13 +271,15 @@ public void externalTableWithPartitions() throws Throwable { assertFalseExternalFileInfo(new Path(new Path(tuple.dumpLocation, primaryDbName.toLowerCase()), FILE_NAME)); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 't2'") .verifyResults(new String[] {"t2"}) .run("select place from t2") .verifyResults(new String[] {}) - .verifyReplTargetProperty(replicatedDbName); + .verifyReplTargetProperty(replicatedDbName) + .run("show partitions t2") + .verifyResults(new String[] {"country=india"}); // add new data externally, to a partition, but under the table level top directory Path partitionDir = new Path(externalTableLocation, "country=india"); @@ -291,7 +294,7 @@ public void externalTableWithPartitions() throws Throwable { assertFalseExternalFileInfo(new Path(tuple.dumpLocation, FILE_NAME)); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("select distinct(country) from t2") .verifyResults(new String[] {}) @@ -299,6 +302,8 @@ public void externalTableWithPartitions() throws Throwable { .verifyResults(new String[] {}) .run("select place from t2 where country='australia'") .verifyResults(new String[] {}) + .run("show partitions t2") + .verifyResults(new String[] {"country=australia", "country=india"}) .verifyReplTargetProperty(replicatedDbName); Path customPartitionLocation = @@ -316,10 +321,12 @@ public void externalTableWithPartitions() throws Throwable { .toString() + "'") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("select place from t2 where country='france'") .verifyResults(new String[] {}) + .run("show partitions t2") + .verifyResults(new String[] {"country=australia", "country=france", "country=india"}) .verifyReplTargetProperty(replicatedDbName); // change the location of the partition via alter command @@ -330,7 +337,7 @@ public void externalTableWithPartitions() throws Throwable { .run("alter table t2 partition (country='france') set location '" + tmpLocation + "'") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("select place from t2 where country='france'") .verifyResults(new String[] {}) @@ -346,13 +353,13 @@ public void externalTableWithPartitions() throws Throwable { .run("alter table t2 set location '" + tmpLocation2 + "'") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, loadWithClause); + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause); } @Test public void externalTableIncrementalReplication() throws Throwable { WarehouseInstance.Tuple tuple = primary.dumpWithCommand("repl dump " + primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); Path externalTableLocation = new Path("/" + testName.getMethodName() + "/t1/"); @@ -382,7 +389,7 @@ public void externalTableIncrementalReplication() throws Throwable { } List loadWithClause = externalTableBasePathWithClause(); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 't1'") .verifyResult("t1") @@ -403,7 +410,7 @@ public void externalTableIncrementalReplication() throws Throwable { tuple = primary.dump(primaryDbName); assertFalseExternalFileInfo(new Path(tuple.dumpLocation, FILE_NAME)); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables like 't1'") .verifyResult("t1") @@ -424,7 +431,7 @@ public void externalTableIncrementalReplication() throws Throwable { .run("alter table t1 drop partition (country='us')") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("select * From t1") .verifyResults(new String[] {}) .verifyReplTargetProperty(replicatedDbName); @@ -455,7 +462,7 @@ public void bootstrapExternalTablesDuringIncrementalPhase() throws Throwable { assertFalse(primary.miniDFSCluster.getFileSystem() .exists(new Path(new Path(tuple.dumpLocation, primaryDbName.toLowerCase()), FILE_NAME))); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(tuple.lastReplicationId) .run("use " + replicatedDbName) @@ -496,7 +503,7 @@ public void bootstrapExternalTablesDuringIncrementalPhase() throws Throwable { tblPath = new Path(dbPath, "t3"); assertTrue(primary.miniDFSCluster.getFileSystem().exists(tblPath)); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(tuple.lastReplicationId) .run("use " + replicatedDbName) @@ -542,7 +549,7 @@ public void testExternalTablesIncReplicationWithConcurrentDropTable() throws Thr .run("insert into table t1 values (1)") .dump(primaryDbName, dumpWithClause); - replica.load(replicatedDbName, primaryDbName, loadWithClause); + replica.load(replicatedDbName, tupleBootstrap.dumpLocation, loadWithClause); // Insert a row into "t1" and create another external table using data from "t1". primary.run("use " + primaryDbName) @@ -580,7 +587,7 @@ public Table apply(@Nullable Table table) { // The newly inserted data "2" should be missing in table "t1". But, table t2 should exist and have // inserted data. - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, tupleInc.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("select id from t1 order by id") .verifyResult(null) @@ -600,14 +607,14 @@ public void testIncrementalDumpEmptyDumpDirectory() throws Throwable { .run("insert into table t1 values (2)") .dump(primaryDbName, dumpWithClause); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .status(replicatedDbName) .verifyResult(tuple.lastReplicationId); // This looks like an empty dump but it has the ALTER TABLE event created by the previous // dump. We need it here so that the next dump won't have any events. WarehouseInstance.Tuple incTuple = primary.dump(primaryDbName, dumpWithClause); - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, incTuple.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(incTuple.lastReplicationId); @@ -622,7 +629,7 @@ public void testIncrementalDumpEmptyDumpDirectory() throws Throwable { Long.valueOf(inc2Tuple.lastReplicationId).longValue()); // Incremental load to existing database with empty dump directory should set the repl id to the last event at src. - replica.load(replicatedDbName, primaryDbName, loadWithClause) + replica.load(replicatedDbName, inc2Tuple.dumpLocation, loadWithClause) .status(replicatedDbName) .verifyResult(inc2Tuple.lastReplicationId); } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java index bcab190d24..15cb985b9a 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java @@ -85,7 +85,6 @@ static void internalBeforeClassSetup(Map overrides, Class clazz) acidConfs.putAll(overrides); primary = new WarehouseInstance(LOG, miniDFSCluster, acidConfs); - acidConfs.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replica = new WarehouseInstance(LOG, miniDFSCluster, acidConfs); Map overridesForHiveConf1 = new HashMap() {{ put("fs.defaultFS", miniDFSCluster.getFileSystem().getUri().toString()); @@ -94,7 +93,6 @@ static void internalBeforeClassSetup(Map overrides, Class clazz) put("hive.metastore.client.capability.check", "false"); put("hive.stats.autogather", "false"); }}; - overridesForHiveConf1.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replicaNonAcid = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConf1); } @@ -126,7 +124,7 @@ public void tearDown() throws Throwable { @Test public void testAcidTableIncrementalReplication() throws Throwable { WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootStrapDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(bootStrapDump.lastReplicationId); List selectStmtList = new ArrayList<>(); @@ -211,7 +209,7 @@ public void testReplCM() throws Throwable { WarehouseInstance.Tuple incrementalDump; WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootStrapDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(bootStrapDump.lastReplicationId); @@ -219,7 +217,7 @@ public void testReplCM() throws Throwable { tableName, null, false, ReplicationTestUtils.OperationType.REPL_TEST_ACID_INSERT); incrementalDump = primary.dump(primaryDbName); primary.run("drop table " + primaryDbName + "." + tableName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + replica.loadWithoutExplain(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName).verifyResult(incrementalDump.lastReplicationId); verifyResultsInReplicaInt(Lists.newArrayList("select count(*) from " + tableName, "select count(*) from " + tableName + "_nopart"), @@ -229,7 +227,7 @@ public void testReplCM() throws Throwable { tableNameMM, null, true, ReplicationTestUtils.OperationType.REPL_TEST_ACID_INSERT); incrementalDump = primary.dump(primaryDbName); primary.run("drop table " + primaryDbName + "." + tableNameMM); - replica.loadWithoutExplain(replicatedDbName, primaryDbName) + replica.loadWithoutExplain(replicatedDbName, incrementalDump.dumpLocation) .run("REPL STATUS " + replicatedDbName).verifyResult(incrementalDump.lastReplicationId); verifyResultsInReplicaInt(Lists.newArrayList("select count(*) from " + tableNameMM, "select count(*) from " + tableNameMM + "_nopart"), diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigration.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigration.java index 3eab04552e..7fa23b1d8b 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigration.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigration.java @@ -100,6 +100,7 @@ static void internalBeforeClassSetup(Map overrideConfigs) throws put("hive.metastore.disallow.incompatible.col.type.changes", "false"); put("hive.strict.managed.tables", "true"); }}; + replica = new WarehouseInstance(LOG, miniDFSCluster, hiveConfigs); HashMap configsForPrimary = new HashMap() {{ put("fs.defaultFS", fs.getUri().toString()); @@ -117,8 +118,6 @@ static void internalBeforeClassSetup(Map overrideConfigs) throws }}; configsForPrimary.putAll(overrideConfigs); primary = new WarehouseInstance(LOG, miniDFSCluster, configsForPrimary); - hiveConfigs.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); - replica = new WarehouseInstance(LOG, miniDFSCluster, hiveConfigs); } private static Path createAvroSchemaFile(FileSystem fs, Path testPath) throws IOException { @@ -306,7 +305,7 @@ private Path assertTablePath(String replicatedDbName, String tableName) throws E return tablePath; } - private void loadWithFailureInAddNotification(String tbl) throws Throwable { + private void loadWithFailureInAddNotification(String tbl, String dumpLocation) throws Throwable { BehaviourInjection callerVerifier = new BehaviourInjection() { @Nullable @@ -327,7 +326,7 @@ public Boolean apply(@Nullable InjectableBehaviourObjectStore.CallerArguments ar }; InjectableBehaviourObjectStore.setCallerVerifier(callerVerifier); try { - replica.loadFailure(replicatedDbName, primaryDbName); + replica.loadFailure(replicatedDbName, dumpLocation); } finally { InjectableBehaviourObjectStore.resetCallerVerifier(); } @@ -337,49 +336,49 @@ public Boolean apply(@Nullable InjectableBehaviourObjectStore.CallerArguments ar @Test public void testBootstrapLoadMigrationManagedToAcid() throws Throwable { WarehouseInstance.Tuple tuple = prepareDataAndDump(primaryDbName, null); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); } @Test public void testIncrementalLoadMigrationManagedToAcid() throws Throwable { WarehouseInstance.Tuple tuple = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); tuple = prepareDataAndDump(primaryDbName, tuple.lastReplicationId); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); } @Test public void testIncrementalLoadMigrationManagedToAcidFailure() throws Throwable { WarehouseInstance.Tuple tuple = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); tuple = prepareDataAndDump(primaryDbName, tuple.lastReplicationId); - loadWithFailureInAddNotification("tacid"); + loadWithFailureInAddNotification("tacid", tuple.dumpLocation); replica.run("use " + replicatedDbName) .run("show tables like tacid") .verifyResult(null); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); } @Test public void testIncrementalLoadMigrationManagedToAcidFailurePart() throws Throwable { WarehouseInstance.Tuple tuple = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); tuple = prepareDataAndDump(primaryDbName, tuple.lastReplicationId); - loadWithFailureInAddNotification("tacidpart"); + loadWithFailureInAddNotification("tacidpart", tuple.dumpLocation); replica.run("use " + replicatedDbName) .run("show tables like tacidpart") .verifyResult(null); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); } @Test public void testIncrementalLoadMigrationManagedToAcidAllOp() throws Throwable { WarehouseInstance.Tuple bootStrapDump = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, bootStrapDump.dumpLocation) .run("REPL STATUS " + replicatedDbName) .verifyResult(bootStrapDump.lastReplicationId); List selectStmtList = new ArrayList<>(); @@ -419,13 +418,13 @@ public void testBootstrapConvertedExternalTableAutoPurgeDataOnDrop() throws Thro .run("insert into avro_tbl partition (country='india') values ('another', 13)") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, bootstrap.dumpLocation); Path dataLocation = assertTablePath(replicatedDbName, "avro_tbl"); WarehouseInstance.Tuple incremental = primary.run("use " + primaryDbName) .run("drop table avro_tbl") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, incremental.dumpLocation); // After drop, the external table data location should be auto deleted as it is converted one. assertFalse(replica.miniDFSCluster.getFileSystem().exists(dataLocation)); @@ -434,15 +433,15 @@ public void testBootstrapConvertedExternalTableAutoPurgeDataOnDrop() throws Thro @Test public void testIncConvertedExternalTableAutoDeleteDataDirOnDrop() throws Throwable { WarehouseInstance.Tuple bootstrap = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, bootstrap.dumpLocation); - primary.run("use " + primaryDbName) + WarehouseInstance.Tuple incremental = primary.run("use " + primaryDbName) .run("create table avro_tbl ROW FORMAT SERDE " + "'org.apache.hadoop.hive.serde2.avro.AvroSerDe' stored as avro " + "tblproperties ('avro.schema.url'='" + avroSchemaFile.toUri().toString() + "')") .run("insert into avro_tbl values ('str', 13)") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, incremental.dumpLocation); // Data location is valid and is under default external warehouse directory. Table avroTable = replica.getTable(replicatedDbName, "avro_tbl"); @@ -450,10 +449,10 @@ public void testIncConvertedExternalTableAutoDeleteDataDirOnDrop() throws Throwa Path dataLocation = new Path(avroTable.getSd().getLocation()); assertTrue(replica.miniDFSCluster.getFileSystem().exists(dataLocation)); - primary.run("use " + primaryDbName) + incremental = primary.run("use " + primaryDbName) .run("drop table avro_tbl") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, incremental.dumpLocation); // After drop, the external table data location should be auto deleted as it is converted one. assertFalse(replica.miniDFSCluster.getFileSystem().exists(dataLocation)); @@ -464,7 +463,7 @@ public void testBootstrapLoadMigrationToAcidWithMoveOptimization() throws Throwa List withConfigs = Collections.singletonList("'hive.repl.enable.move.optimization'='true'"); WarehouseInstance.Tuple tuple = prepareDataAndDump(primaryDbName, null); - replica.load(replicatedDbName, primaryDbName, withConfigs); + replica.load(replicatedDbName, tuple.dumpLocation, withConfigs); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); } @@ -473,9 +472,9 @@ public void testIncrementalLoadMigrationToAcidWithMoveOptimization() throws Thro List withConfigs = Collections.singletonList("'hive.repl.enable.move.optimization'='true'"); WarehouseInstance.Tuple tuple = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tuple.dumpLocation); tuple = prepareDataAndDump(primaryDbName, tuple.lastReplicationId); - replica.load(replicatedDbName, primaryDbName, withConfigs); + replica.load(replicatedDbName, tuple.dumpLocation, withConfigs); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); } @@ -515,7 +514,7 @@ public void testMigrationWithUpgrade() throws Throwable { .run("create table texternal (id int) ") .run("insert into texternal values (1)") .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName) + replica.load(replicatedDbName, tuple.dumpLocation) .run("use " + replicatedDbName) .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) @@ -559,7 +558,7 @@ public void testMigrationWithUpgrade() throws Throwable { withConfigs.add("'hive.repl.include.external.tables'='true'"); withConfigs.add("'hive.distcp.privileged.doAs' = '" + UserGroupInformation.getCurrentUser().getUserName() + "'"); tuple = primary.dump(primaryDbName, withConfigs); - replica.load(replicatedDbName, primaryDbName, withConfigs); + replica.load(replicatedDbName, tuple.dumpLocation, withConfigs); replica.run("use " + replicatedDbName) .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java index 1b4833ca6a..425c8cbf42 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java @@ -26,7 +26,6 @@ import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection; import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils; import org.apache.hadoop.hive.shims.Utils; import org.apache.hive.hcatalog.listener.DbNotificationListener; @@ -81,6 +80,7 @@ static void internalBeforeClassSetup(Map overrideConfigs) throws put("hive.strict.managed.tables", "true"); put("hive.metastore.transactional.event.listeners", ""); }}; + replica = new WarehouseInstance(LOG, miniDFSCluster, hiveConfigs); HashMap configsForPrimary = new HashMap() {{ put("fs.defaultFS", fs.getUri().toString()); @@ -96,8 +96,6 @@ static void internalBeforeClassSetup(Map overrideConfigs) throws }}; configsForPrimary.putAll(overrideConfigs); primary = new WarehouseInstance(LOG, miniDFSCluster, configsForPrimary); - hiveConfigs.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); - replica = new WarehouseInstance(LOG, miniDFSCluster, hiveConfigs); } @AfterClass @@ -188,13 +186,13 @@ public void testConcurrentOpDuringBootStrapDumpCreateTableReplay() throws Throwa // dump with operation after last repl id is fetched. WarehouseInstance.Tuple tuple = dumpWithLastEventIdHacked(2); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); assertTrue(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); // next incremental dump tuple = primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); } @@ -205,13 +203,13 @@ public void testConcurrentOpDuringBootStrapDumpInsertReplay() throws Throwable { // dump with operation after last repl id is fetched. WarehouseInstance.Tuple tuple = dumpWithLastEventIdHacked(4); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); assertTrue(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); // next incremental dump tuple = primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); } @@ -223,13 +221,13 @@ public void testTableLevelDumpMigration() throws Throwable { .run("create table t1 (i int, j int)") .dump(primaryDbName+".'t1'"); replica.run("create database " + replicatedDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); assertTrue(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); tuple = primary.run("use " + primaryDbName) .run("insert into t1 values (1, 2)") .dump(primaryDbName+".'t1'"); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); } @@ -245,7 +243,7 @@ public void testConcurrentOpDuringBootStrapDumpInsertOverwrite() throws Throwabl // dump with operation after last repl id is fetched. WarehouseInstance.Tuple tuple = dumpWithLastEventIdHacked(2); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); replica.run("use " + replicatedDbName) .run("show tables") .verifyResults(new String[] {"tacid"}) @@ -259,7 +257,7 @@ public void testConcurrentOpDuringBootStrapDumpInsertOverwrite() throws Throwabl // next incremental dump tuple = primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); replica.run("use " + replicatedDbName) .run("show tables") .verifyResults(new String[] {"tacid"}) @@ -272,7 +270,7 @@ public void testConcurrentOpDuringBootStrapDumpInsertOverwrite() throws Throwabl assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); } - private void loadWithFailureInAddNotification(String tbl) throws Throwable { + private void loadWithFailureInAddNotification(String tbl, String dumpLocation) throws Throwable { BehaviourInjection callerVerifier = new BehaviourInjection() { @Nullable @@ -296,7 +294,7 @@ public Boolean apply(@Nullable InjectableBehaviourObjectStore.CallerArguments ar try { List withClause = Collections.singletonList("'hive.metastore.transactional.event.listeners'='" + DbNotificationListener.class.getCanonicalName() + "'"); - replica.loadFailure(replicatedDbName, primaryDbName, withClause); + replica.loadFailure(replicatedDbName, dumpLocation, withClause); } finally { InjectableBehaviourObjectStore.resetCallerVerifier(); } @@ -309,7 +307,7 @@ public void testIncLoadPenFlagPropAlterDB() throws Throwable { // dump with operation after last repl id is fetched. WarehouseInstance.Tuple tuple = dumpWithLastEventIdHacked(4); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); assertTrue(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); assertFalse(ReplUtils.isFirstIncPending(primary.getDatabase(primaryDbName).getParameters())); @@ -319,15 +317,15 @@ public void testIncLoadPenFlagPropAlterDB() throws Throwable { .run("create table tbl_temp (fld int)") .dump(primaryDbName); - loadWithFailureInAddNotification("tbl_temp"); + loadWithFailureInAddNotification("tbl_temp", tuple.dumpLocation); Database replDb = replica.getDatabase(replicatedDbName); assertTrue(ReplUtils.isFirstIncPending(replDb.getParameters())); assertFalse(ReplUtils.isFirstIncPending(primary.getDatabase(primaryDbName).getParameters())); assertTrue(replDb.getParameters().get("dummy_key").equalsIgnoreCase("dummy_val")); // next incremental dump - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + tuple = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); } @@ -339,13 +337,13 @@ public void testIncLoadPenFlagWithMoveOptimization() throws Throwable { // dump with operation after last repl id is fetched. WarehouseInstance.Tuple tuple = dumpWithLastEventIdHacked(4); - replica.load(replicatedDbName, primaryDbName, withClause); + replica.load(replicatedDbName, tuple.dumpLocation, withClause); verifyLoadExecution(replicatedDbName, tuple.lastReplicationId); assertTrue(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); // next incremental dump tuple = primary.dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName, withClause); + replica.load(replicatedDbName, tuple.dumpLocation, withClause); assertFalse(ReplUtils.isFirstIncPending(replica.getDatabase(replicatedDbName).getParameters())); } @@ -384,21 +382,21 @@ public void testOnwerPropagation() throws Throwable { // test bootstrap alterUserName("hive"); - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + WarehouseInstance.Tuple tuple = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); verifyUserName("hive"); // test incremental alterUserName("hive1"); - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + tuple = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); verifyUserName("hive1"); } @Test public void testOnwerPropagationInc() throws Throwable { - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + WarehouseInstance.Tuple tuple = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); primary.run("use " + primaryDbName) .run("create table tbl_own (fld int)") @@ -411,8 +409,8 @@ public void testOnwerPropagationInc() throws Throwable { // test incremental when table is getting created in the same load alterUserName("hive"); - primary.dump(primaryDbName); - replica.loadWithoutExplain(replicatedDbName, primaryDbName); + tuple = primary.dump(primaryDbName); + replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); verifyUserName("hive"); } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestScheduledReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestScheduledReplicationScenarios.java index afb53b8642..c51bec1e27 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestScheduledReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestScheduledReplicationScenarios.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.parse; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; @@ -75,9 +76,16 @@ static void internalBeforeClassSetup(Map overrides, }}; acidEnableConf.putAll(overrides); + primary = new WarehouseInstance(LOG, miniDFSCluster, acidEnableConf); - acidEnableConf.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); replica = new WarehouseInstance(LOG, miniDFSCluster, acidEnableConf); + Map overridesForHiveConf1 = new HashMap() {{ + put("fs.defaultFS", miniDFSCluster.getFileSystem().getUri().toString()); + put("hive.support.concurrency", "false"); + put("hive.txn.manager", "org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager"); + put("hive.metastore.client.capability.check", "false"); + }}; + replicaNonAcid = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConf1); } @Before @@ -89,15 +97,16 @@ public void setup() throws Throwable { public void tearDown() throws Throwable { primary.run("drop database if exists " + primaryDbName + " cascade"); replica.run("drop database if exists " + replicatedDbName + " cascade"); + replicaNonAcid.run("drop database if exists " + replicatedDbName + " cascade"); primary.run("drop database if exists " + primaryDbName + "_extra cascade"); } @Test - public void testAcidTablesReplLoadBootstrapIncr() throws Throwable { + public void testAcidTablesBootstrapIncr() throws Throwable { // Bootstrap primary.run("use " + primaryDbName) .run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc " + - "tblproperties (\"transactional\"=\"true\")") + "tblproperties (\"transactional\"=\"true\")") .run("insert into t1 values(1)") .run("insert into t1 values(2)"); try (ScheduledQueryExecutionService schqS = @@ -107,53 +116,36 @@ public void testAcidTablesReplLoadBootstrapIncr() throws Throwable { primary.run("create scheduled query s1 every 10 minutes as repl dump " + primaryDbName); primary.run("alter scheduled query s1 execute"); Thread.sleep(6000); - replica.run("create scheduled query s2 every 10 minutes as repl load " + primaryDbName + " INTO " - + replicatedDbName); - replica.run("alter scheduled query s2 execute"); - Thread.sleep(20000); + Path dumpRoot = new Path(primary.hiveConf.getVar(HiveConf.ConfVars.REPLDIR), primaryDbName.toLowerCase()); + Path currdumpRoot = new Path(dumpRoot, String.valueOf(next)); + replica.load(replicatedDbName, currdumpRoot.toString()); replica.run("use " + replicatedDbName) .run("show tables like 't1'") .verifyResult("t1") .run("select id from t1 order by id") .verifyResults(new String[]{"1", "2"}); - // First incremental, after bootstrap - primary.run("use " + primaryDbName) - .run("insert into t1 values(3)") - .run("insert into t1 values(4)"); - next++; - ReplDumpWork.injectNextDumpDirForTest(String.valueOf(next)); - primary.run("alter scheduled query s1 execute"); - Thread.sleep(20000); - replica.run("alter scheduled query s2 execute"); - Thread.sleep(20000); - replica.run("use " + replicatedDbName) - .run("show tables like 't1'") - .verifyResult("t1") - .run("select id from t1 order by id") - .verifyResults(new String[]{"1", "2", "3", "4"}); + // First incremental, after bootstrap - // Second incremental primary.run("use " + primaryDbName) - .run("insert into t1 values(5)") - .run("insert into t1 values(6)"); + .run("insert into t1 values(3)") + .run("insert into t1 values(4)"); next++; ReplDumpWork.injectNextDumpDirForTest(String.valueOf(next)); primary.run("alter scheduled query s1 execute"); - Thread.sleep(30000); - replica.run("alter scheduled query s2 execute"); - Thread.sleep(30000); + Thread.sleep(20000); + Path incrdumpRoot = new Path(dumpRoot, String.valueOf(next)); + replica.load(replicatedDbName, incrdumpRoot.toString()); replica.run("use " + replicatedDbName) .run("show tables like 't1'") .verifyResult("t1") .run("select id from t1 order by id") - .verifyResults(new String[]{"1", "2", "3", "4", "5", "6"}) + .verifyResults(new String[]{"1", "2", "3", "4"}) .run("drop table t1"); } finally { primary.run("drop scheduled query s1"); - replica.run("drop scheduled query s2"); } } } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestStatsReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestStatsReplicationScenarios.java index b2733d143c..44a3805dee 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestStatsReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestStatsReplicationScenarios.java @@ -97,21 +97,20 @@ static void internalBeforeClassSetup(Map primaryOverrides, put("fs.defaultFS", miniDFSCluster.getFileSystem().getUri().toString()); put(HiveConf.ConfVars.HIVE_IN_TEST_REPL.varname, "true"); }}; - Map replicatedOverrides = new HashMap<>(); + Map overrides = new HashMap<>(); - replicatedOverrides.putAll(additionalOverrides); - replicatedOverrides.putAll(replicaOverrides); + overrides.putAll(additionalOverrides); + overrides.putAll(replicaOverrides); + replica = new WarehouseInstance(LOG, miniDFSCluster, overrides); // Run with autogather false on primary if requested - Map sourceOverrides = new HashMap<>(); hasAutogather = autogather; additionalOverrides.put(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, autogather ? "true" : "false"); - sourceOverrides.putAll(additionalOverrides); - sourceOverrides.putAll(primaryOverrides); - primary = new WarehouseInstance(LOG, miniDFSCluster, sourceOverrides); - replicatedOverrides.put(MetastoreConf.ConfVars.REPLDIR.getHiveName(), primary.repldDir); - replica = new WarehouseInstance(LOG, miniDFSCluster, replicatedOverrides); + overrides.clear(); + overrides.putAll(additionalOverrides); + overrides.putAll(primaryOverrides); + primary = new WarehouseInstance(LOG, miniDFSCluster, overrides); // Use transactional tables acidTableKindToUse = acidTableKind; @@ -331,14 +330,14 @@ private String dumpLoadVerify(List tableNames, String lastReplicationId, // checkpoint for a table in the middle of list of tables. if (failRetry) { if (lastReplicationId == null) { - failBootstrapLoad(tableNames.size()/2); + failBootstrapLoad(dumpTuple, tableNames.size()/2); } else { - failIncrementalLoad(); + failIncrementalLoad(dumpTuple); } } // Load, possibly a retry - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, dumpTuple.dumpLocation); // Metadata load may not load all the events. if (!metadataOnly) { @@ -364,8 +363,9 @@ private String dumpLoadVerify(List tableNames, String lastReplicationId, /** * Run a bootstrap that will fail. + * @param tuple the location of bootstrap dump */ - private void failBootstrapLoad(int failAfterNumTables) throws Throwable { + private void failBootstrapLoad(WarehouseInstance.Tuple tuple, int failAfterNumTables) throws Throwable { // fail setting ckpt directory property for the second table so that we test the case when // bootstrap load fails after some but not all tables are loaded. BehaviourInjection callerVerifier @@ -391,14 +391,14 @@ public Boolean apply(@Nullable CallerArguments args) { InjectableBehaviourObjectStore.setAlterTableModifier(callerVerifier); try { - replica.loadFailure(replicatedDbName, primaryDbName); + replica.loadFailure(replicatedDbName, tuple.dumpLocation); callerVerifier.assertInjectionsPerformed(true, false); } finally { InjectableBehaviourObjectStore.resetAlterTableModifier(); } } - private void failIncrementalLoad() throws Throwable { + private void failIncrementalLoad(WarehouseInstance.Tuple dumpTuple) throws Throwable { // fail add notification when second update table stats event is encountered. Thus we // test successful application as well as failed application of this event. BehaviourInjection callerVerifier @@ -421,7 +421,7 @@ public Boolean apply(NotificationEvent entry) { InjectableBehaviourObjectStore.setAddNotificationModifier(callerVerifier); try { - replica.loadFailure(replicatedDbName, primaryDbName); + replica.loadFailure(replicatedDbName, dumpTuple.dumpLocation); } finally { InjectableBehaviourObjectStore.resetAddNotificationModifier(); } @@ -449,7 +449,7 @@ public Boolean apply(NotificationEvent entry) { InjectableBehaviourObjectStore.setAddNotificationModifier(callerVerifier); try { - replica.loadFailure(replicatedDbName, primaryDbName); + replica.loadFailure(replicatedDbName, dumpTuple.dumpLocation); } finally { InjectableBehaviourObjectStore.resetAddNotificationModifier(); } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestTableLevelReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestTableLevelReplicationScenarios.java index 0c44100e09..15b6c3dda1 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestTableLevelReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestTableLevelReplicationScenarios.java @@ -163,7 +163,7 @@ private String replicateAndVerify(String replPolicy, String oldReplPolicy, Strin // If the policy contains '.'' means its table level replication. verifyTableListForPolicy(tuple.dumpLocation, replPolicy.contains(".'") ? expectedTables : null); - replica.load(replicatedDbName, replPolicy, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables") .verifyResults(expectedTables) @@ -207,7 +207,7 @@ private String replicateAndVerifyClearDump(String replPolicy, String oldReplPoli // If the policy contains '.'' means its table level replication. verifyTableListForPolicy(tuple.dumpLocation, replPolicy.contains(".'") ? expectedTables : null); - replica.load(replicatedDbName, replPolicy, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables") .verifyResults(expectedTables) @@ -310,7 +310,7 @@ public void testBasicBootstrapWithIncludeAndExcludeList() throws Throwable { public void testBasicIncrementalWithIncludeList() throws Throwable { WarehouseInstance.Tuple tupleBootstrap = primary.run("use " + primaryDbName) .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tupleBootstrap.dumpLocation); String[] originalNonAcidTables = new String[] {"t1", "t2"}; String[] originalFullAcidTables = new String[] {"t3", "t4"}; @@ -329,7 +329,7 @@ public void testBasicIncrementalWithIncludeList() throws Throwable { public void testBasicIncrementalWithIncludeAndExcludeList() throws Throwable { WarehouseInstance.Tuple tupleBootstrap = primary.run("use " + primaryDbName) .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tupleBootstrap.dumpLocation); String[] originalTables = new String[] {"t1", "t11", "t2", "t3", "t111"}; createTables(originalTables, CreateTableType.NON_ACID); @@ -373,7 +373,7 @@ public void testIncorrectTablePolicyInReplDump() throws Throwable { String replPolicy = primaryDbName; WarehouseInstance.Tuple tupleBootstrap = primary.run("use " + primaryDbName) .dump(primaryDbName); - replica.load(replicatedDbName, primaryDbName); + replica.load(replicatedDbName, tupleBootstrap.dumpLocation); String lastReplId = tupleBootstrap.lastReplicationId; for (String oldReplPolicy : invalidReplPolicies) { failed = false; @@ -504,7 +504,7 @@ public void testBootstrapExternalTablesWithIncludeAndExcludeList() throws Throwa ReplicationTestUtils.assertExternalFileInfo(primary, Arrays.asList("a2"), new Path(new Path(tuple.dumpLocation, primaryDbName.toLowerCase()), FILE_NAME)); - replica.load(replicatedDbName, replPolicy, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables") .verifyResults(replicatedTables) @@ -543,7 +543,7 @@ public void testBootstrapExternalTablesIncrementalPhaseWithIncludeAndExcludeList ReplicationTestUtils.assertExternalFileInfo(primary, Arrays.asList("a2"), new Path(tuple.dumpLocation, FILE_NAME)); - replica.load(replicatedDbName, replPolicy, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables") .verifyResults(incrementalReplicatedTables) @@ -695,7 +695,7 @@ public void testReplacePolicyOnBootstrapExternalTablesIncrementalPhase() throws // Verify if the expected tables are bootstrapped. verifyBootstrapDirInIncrementalDump(tuple.dumpLocation, bootstrappedTables); - replica.load(replicatedDbName, replPolicy, loadWithClause) + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause) .run("use " + replicatedDbName) .run("show tables") .verifyResults(incrementalReplicatedTables) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java index 498d59c359..f1eba52648 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java @@ -304,29 +304,21 @@ WarehouseInstance dumpFailure(String dbName) throws Throwable { return this; } - WarehouseInstance load(String replicatedDbName, String primaryDbName) throws Throwable { - StringBuilder replCommand = new StringBuilder("REPL LOAD " + primaryDbName); - if (!StringUtils.isEmpty(replicatedDbName)) { - replCommand.append(" INTO " + replicatedDbName); - } - run("EXPLAIN " + replCommand.toString()); + WarehouseInstance load(String replicatedDbName, String dumpLocation) throws Throwable { + run("EXPLAIN REPL LOAD " + replicatedDbName + " FROM '" + dumpLocation + "'"); printOutput(); - run(replCommand.toString()); + run("REPL LOAD " + replicatedDbName + " FROM '" + dumpLocation + "'"); return this; } - WarehouseInstance loadWithoutExplain(String replicatedDbName, String primaryDbName) throws Throwable { - StringBuilder replCommand = new StringBuilder("REPL LOAD " + primaryDbName); - if (!StringUtils.isEmpty(replicatedDbName)) { - replCommand.append(" INTO " + replicatedDbName); - } - run(replCommand.toString() + " with ('hive.exec.parallel'='true')"); + WarehouseInstance loadWithoutExplain(String replicatedDbName, String dumpLocation) throws Throwable { + run("REPL LOAD " + replicatedDbName + " FROM '" + dumpLocation + "' with ('hive.exec.parallel'='true')"); return this; } - WarehouseInstance load(String replicatedDbName, String primaryDbName, List withClauseOptions) + WarehouseInstance load(String replicatedDbName, String dumpLocation, List withClauseOptions) throws Throwable { - String replLoadCmd = "REPL LOAD " + primaryDbName + " INTO " + replicatedDbName; + String replLoadCmd = "REPL LOAD " + replicatedDbName + " FROM '" + dumpLocation + "'"; if ((withClauseOptions != null) && !withClauseOptions.isEmpty()) { replLoadCmd += " WITH (" + StringUtils.join(withClauseOptions, ",") + ")"; } @@ -346,23 +338,23 @@ WarehouseInstance status(String replicatedDbName, List withClauseOptions return run(replStatusCmd); } - WarehouseInstance loadFailure(String replicatedDbName, String primaryDbName) throws Throwable { - loadFailure(replicatedDbName, primaryDbName, null); + WarehouseInstance loadFailure(String replicatedDbName, String dumpLocation) throws Throwable { + loadFailure(replicatedDbName, dumpLocation, null); return this; } - WarehouseInstance loadFailure(String replicatedDbName, String primaryDbName, List withClauseOptions) + WarehouseInstance loadFailure(String replicatedDbName, String dumpLocation, List withClauseOptions) throws Throwable { - String replLoadCmd = "REPL LOAD " + primaryDbName + " INTO " + replicatedDbName; + String replLoadCmd = "REPL LOAD " + replicatedDbName + " FROM '" + dumpLocation + "'"; if ((withClauseOptions != null) && !withClauseOptions.isEmpty()) { replLoadCmd += " WITH (" + StringUtils.join(withClauseOptions, ",") + ")"; } return runFailure(replLoadCmd); } - WarehouseInstance loadFailure(String replicatedDbName, String primaryDbName, List withClauseOptions, + WarehouseInstance loadFailure(String replicatedDbName, String dumpLocation, List withClauseOptions, int errorCode) throws Throwable { - String replLoadCmd = "REPL LOAD " + primaryDbName + " INTO " + replicatedDbName; + String replLoadCmd = "REPL LOAD " + replicatedDbName + " FROM '" + dumpLocation + "'"; if ((withClauseOptions != null) && !withClauseOptions.isEmpty()) { replLoadCmd += " WITH (" + StringUtils.join(withClauseOptions, ",") + ")"; } diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java index dbe282d374..914147f38a 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java @@ -2850,6 +2850,8 @@ public void testGetQueryLogForReplCommands() throws Exception { ResultSet replDumpRslt = stmt.executeQuery("repl dump " + primaryDb + " with ('hive.repl.rootdir' = '" + replDir + "')"); assertTrue(replDumpRslt.next()); + String dumpLocation = replDumpRslt.getString(1); + String lastReplId = replDumpRslt.getString(2); List logs = stmt.getQueryLog(false, 10000); stmt.close(); LOG.info("Query_Log for Bootstrap Dump"); @@ -2863,8 +2865,7 @@ public void testGetQueryLogForReplCommands() throws Exception { // Bootstrap load stmt = (HiveStatement) con.createStatement(); - stmt.execute("repl load " + primaryDb + " into " + replicaDb + - " with ('hive.repl.rootdir' = '" + replDir + "')"); + stmt.execute("repl load " + replicaDb + " from '" + dumpLocation + "'"); logs = stmt.getQueryLog(false, 10000); stmt.close(); LOG.info("Query_Log for Bootstrap Load"); @@ -2888,6 +2889,8 @@ public void testGetQueryLogForReplCommands() throws Exception { replDumpRslt = stmt.executeQuery("repl dump " + primaryDb + " with ('hive.repl.rootdir' = '" + replDir + "')"); assertTrue(replDumpRslt.next()); + dumpLocation = replDumpRslt.getString(1); + lastReplId = replDumpRslt.getString(2); logs = stmt.getQueryLog(false, 10000); stmt.close(); LOG.info("Query_Log for Incremental Dump"); @@ -2901,8 +2904,7 @@ public void testGetQueryLogForReplCommands() throws Exception { // Incremental load stmt = (HiveStatement) con.createStatement(); - stmt.execute("repl load " + primaryDb + " into " + replicaDb + - " with ('hive.repl.rootdir' = '" + replDir + "')"); + stmt.execute("repl load " + replicaDb + " from '" + dumpLocation + "'"); logs = stmt.getQueryLog(false, 10000); LOG.info("Query_Log for Incremental Load"); verifyFetchedLog(logs, expectedIncrementalLoadLogs); @@ -3103,7 +3105,7 @@ public void testReplErrorScenarios() throws Exception { try { // invalid load path - stmt.execute("repl load default into default1"); + stmt.execute("repl load default1 from '/tmp/junk'"); } catch(SQLException e){ assertTrue(e.getErrorCode() == ErrorMsg.REPL_LOAD_PATH_NOT_FOUND.getErrorCode()); } diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index 7fa67960f1..a7c905be35 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -29,7 +29,6 @@ import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.net.URI; -import java.nio.charset.StandardCharsets; import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.DriverManager; @@ -44,7 +43,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Base64; import java.util.concurrent.Callable; import java.util.concurrent.CancellationException; import java.util.concurrent.ExecutionException; @@ -1455,8 +1453,7 @@ public void testReplDumpResultSet() throws Exception { TestJdbcWithMiniHS2.class.getCanonicalName().toLowerCase().replace('.', '_') + "_" + System.currentTimeMillis(); String testPathName = System.getProperty("test.warehouse.dir", "/tmp") + Path.SEPARATOR + tid; - Path testPath = new Path(testPathName + Path.SEPARATOR - + Base64.getEncoder().encodeToString(testDbName.toLowerCase().getBytes(StandardCharsets.UTF_8))); + Path testPath = new Path(testPathName + Path.SEPARATOR + testDbName); FileSystem fs = testPath.getFileSystem(new HiveConf()); Statement stmt = conDefault.createStatement(); try { diff --git a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 3dcd60ecbb..949e57b9ce 100644 --- a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -924,8 +924,12 @@ replDumpStatement : KW_REPL KW_DUMP (dbPolicy=replDbPolicy) (KW_REPLACE oldDbPolicy=replDbPolicy)? + (KW_FROM (eventId=Number) + (KW_TO (rangeEnd=Number))? + (KW_LIMIT (batchSize=Number))? + )? (KW_WITH replConf=replConfigs)? - -> ^(TOK_REPL_DUMP $dbPolicy ^(TOK_REPLACE $oldDbPolicy)? $replConf?) + -> ^(TOK_REPL_DUMP $dbPolicy ^(TOK_REPLACE $oldDbPolicy)? ^(TOK_FROM $eventId (TOK_TO $rangeEnd)? (TOK_LIMIT $batchSize)?)? $replConf?) ; replDbPolicy @@ -939,10 +943,10 @@ replLoadStatement @init { pushMsg("Replication load statement", state); } @after { popMsg(state); } : KW_REPL KW_LOAD - (sourceDbPolicy=replDbPolicy) - (KW_INTO dbName=identifier)? - (KW_WITH replConf=replConfigs)? - -> ^(TOK_REPL_LOAD $sourceDbPolicy ^(TOK_DBNAME $dbName)? $replConf?) + (dbName=identifier)? + KW_FROM (path=StringLiteral) + (KW_WITH replConf=replConfigs)? + -> ^(TOK_REPL_LOAD $path ^(TOK_DBNAME $dbName)? $replConf?) ; replConfigs diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java index fd069688e3..f5eea15e8b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java @@ -76,14 +76,14 @@ import java.io.IOException; import java.io.Serializable; import java.nio.charset.StandardCharsets; -import java.util.Set; -import java.util.HashSet; -import java.util.List; import java.util.Arrays; import java.util.Collections; -import java.util.Base64; -import java.util.ArrayList; +import java.util.List; import java.util.UUID; +import java.util.HashSet; +import java.util.Set; +import java.util.Comparator; +import java.util.ArrayList; import java.util.concurrent.TimeUnit; import static org.apache.hadoop.hive.ql.exec.repl.ReplExternalTables.Writer; @@ -121,9 +121,7 @@ public String getName() { public int execute() { try { Hive hiveDb = getHive(); - Path dumpRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLDIR), - Base64.getEncoder().encodeToString(work.dbNameOrPattern.toLowerCase() - .getBytes(StandardCharsets.UTF_8.name()))); + Path dumpRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLDIR), work.dbNameOrPattern.toLowerCase()); Path currentDumpPath = new Path(dumpRoot, getNextDumpDir()); DumpMetaData dmd = new DumpMetaData(currentDumpPath, conf); // Initialize ReplChangeManager instance since we will require it to encode file URI. @@ -149,13 +147,14 @@ public int execute() { private Long getEventFromPreviousDumpMetadata(Path dumpRoot) throws IOException, SemanticException { FileStatus[] statuses = dumpRoot.getFileSystem(conf).listStatus(dumpRoot); if (statuses.length > 0) { - FileStatus latestUpdatedStatus = statuses[0]; - for (FileStatus status : statuses) { - if (status.getModificationTime() > latestUpdatedStatus.getModificationTime()) { - latestUpdatedStatus = status; + //sort based on last modified. Recent one is at the top + Arrays.sort(statuses, new Comparator() { + public int compare(FileStatus f1, FileStatus f2) { + return Long.compare(f2.getModificationTime(), f1.getModificationTime()); } - } - DumpMetaData dmd = new DumpMetaData(latestUpdatedStatus.getPath(), conf); + }); + FileStatus recentDump = statuses[0]; + DumpMetaData dmd = new DumpMetaData(recentDump.getPath(), conf); if (dmd.isIncrementalDump()) { return dmd.getEventTo(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java index 703eb1159c..2243cb69b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java @@ -19,7 +19,6 @@ import org.antlr.runtime.tree.Tree; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ValidTxnList; @@ -45,14 +44,10 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; -import java.nio.charset.StandardCharsets; -import java.util.Map; -import java.util.Base64; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; import java.util.ArrayList; import java.util.Collections; +import java.util.List; +import java.util.Map; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEQUERYID; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY; @@ -73,8 +68,8 @@ private ReplScope replScope = new ReplScope(); private ReplScope oldReplScope = null; - // Source DB Name for REPL LOAD - private String sourceDbNameOrPattern; + // Base path for REPL LOAD + private String path; // Added conf member to set the REPL command specific config entries without affecting the configs // of any other queries running in the session private HiveConf conf; @@ -82,7 +77,6 @@ // By default, this will be same as that of super class BaseSemanticAnalyzer. But need to obtain again // if the Hive configs are received from WITH clause in REPL LOAD or REPL STATUS commands. private Hive db; - private boolean isTargetAlreadyLoaded; private static final String dumpSchema = "dump_dir,last_repl_id#string,string"; @@ -286,20 +280,18 @@ private boolean ifEnableMoveOptimization(Path filePath, org.apache.hadoop.conf.C } // REPL LOAD - private void initReplLoad(ASTNode ast) throws HiveException { - sourceDbNameOrPattern = PlanUtils.stripQuotes(ast.getChild(0).getText()); + private void initReplLoad(ASTNode ast) throws SemanticException { + path = PlanUtils.stripQuotes(ast.getChild(0).getText()); int numChildren = ast.getChildCount(); for (int i = 1; i < numChildren; i++) { ASTNode childNode = (ASTNode) ast.getChild(i); switch (childNode.getToken().getType()) { - case TOK_DBNAME: - replScope.setDbName(PlanUtils.stripQuotes(childNode.getChild(0).getText())); - break; - case TOK_REPL_CONFIG: - setConfigs((ASTNode) childNode.getChild(0)); - break; - case TOK_REPL_TABLES: //Accept TOK_REPL_TABLES for table level repl.Needn't do anything as dump path needs db only - break; + case TOK_DBNAME: + replScope.setDbName(PlanUtils.stripQuotes(childNode.getChild(0).getText())); + break; + case TOK_REPL_CONFIG: + setConfigs((ASTNode) childNode.getChild(0)); + break; default: throw new SemanticException("Unrecognized token in REPL LOAD statement."); } @@ -349,18 +341,25 @@ private void initReplLoad(ASTNode ast) throws HiveException { * 36/ */ private void analyzeReplLoad(ASTNode ast) throws SemanticException { - try { - initReplLoad(ast); - } catch (HiveException e) { - throw new SemanticException(e); - } + initReplLoad(ast); // For analyze repl load, we walk through the dir structure available in the path, // looking at each db, and then each table, and then setting up the appropriate // import job in its place. try { - assert(sourceDbNameOrPattern != null); - Path loadPath = getCurrentLoadPath(); + assert(path != null); + Path loadPath = new Path(path); + final FileSystem fs = loadPath.getFileSystem(conf); + + // Make fully qualified path for further use. + loadPath = fs.makeQualified(loadPath); + + if (!fs.exists(loadPath)) { + // supposed dump path does not exist. + LOG.error("File not found " + loadPath.toUri().toString()); + throw new FileNotFoundException(ErrorMsg.REPL_LOAD_PATH_NOT_FOUND.getMsg()); + } + // Ths config is set to make sure that in case of s3 replication, move is skipped. try { Warehouse wh = new Warehouse(conf); @@ -388,79 +387,27 @@ private void analyzeReplLoad(ASTNode ast) throws SemanticException { // At this point, all dump dirs should contain a _dumpmetadata file that // tells us what is inside that dumpdir. - //If repl status of target is greater than dumps, don't do anything as the load for the latest dump is done - if (!isTargetAlreadyLoaded) { - DumpMetaData dmd = new DumpMetaData(loadPath, conf); + DumpMetaData dmd = new DumpMetaData(loadPath, conf); - boolean evDump = false; - // we will decide what hdfs locations needs to be copied over here as well. - if (dmd.isIncrementalDump()) { - LOG.debug("{} contains an incremental dump", loadPath); - evDump = true; - } else { - LOG.debug("{} contains an bootstrap dump", loadPath); - } - ReplLoadWork replLoadWork = new ReplLoadWork(conf, loadPath.toString(), replScope.getDbName(), - dmd.getReplScope(), - queryState.getLineageState(), evDump, dmd.getEventTo(), - dirLocationsToCopy(loadPath, evDump)); - rootTasks.add(TaskFactory.get(replLoadWork, conf)); + boolean evDump = false; + // we will decide what hdfs locations needs to be copied over here as well. + if (dmd.isIncrementalDump()) { + LOG.debug("{} contains an incremental dump", loadPath); + evDump = true; + } else { + LOG.debug("{} contains an bootstrap dump", loadPath); } + ReplLoadWork replLoadWork = new ReplLoadWork(conf, loadPath.toString(), replScope.getDbName(), + dmd.getReplScope(), + queryState.getLineageState(), evDump, dmd.getEventTo(), + dirLocationsToCopy(loadPath, evDump)); + rootTasks.add(TaskFactory.get(replLoadWork, conf)); } catch (Exception e) { // TODO : simple wrap & rethrow for now, clean up with error codes throw new SemanticException(e.getMessage(), e); } } - private Path getCurrentLoadPath() throws IOException, SemanticException { - Path loadPathBase = new Path(conf.getVar(HiveConf.ConfVars.REPLDIR), - Base64.getEncoder().encodeToString(sourceDbNameOrPattern.toLowerCase() - .getBytes(StandardCharsets.UTF_8.name()))); - final FileSystem fs = loadPathBase.getFileSystem(conf); - - // Make fully qualified path for further use. - loadPathBase = fs.makeQualified(loadPathBase); - - if (!fs.exists(loadPathBase)) { - // supposed dump path does not exist. - LOG.error("File not found " + loadPathBase.toUri().toString()); - throw new FileNotFoundException(ErrorMsg.REPL_LOAD_PATH_NOT_FOUND.getMsg()); - } - FileStatus[] statuses = loadPathBase.getFileSystem(conf).listStatus(loadPathBase); - if (statuses.length > 0) { - //sort based on last modified. Recent one is at the end - Arrays.sort(statuses, new Comparator() { - public int compare(FileStatus f1, FileStatus f2) { - return Long.compare(f1.getModificationTime(), f2.getModificationTime()); - } - }); - if (replScope.getDbName() != null) { - String currentReplStatusOfTarget - = getReplStatus(replScope.getDbName()); - if (currentReplStatusOfTarget == null) { //bootstrap - return statuses[0].getPath(); - } else { - DumpMetaData latestDump = new DumpMetaData(statuses[statuses.length - 1].getPath(), conf); - if (Long.parseLong(currentReplStatusOfTarget.trim()) >= latestDump.getEventTo()) { - isTargetAlreadyLoaded = true; - } else { - for (FileStatus status : statuses) { - DumpMetaData dmd = new DumpMetaData(status.getPath(), conf); - if (dmd.isIncrementalDump() - && Long.parseLong(currentReplStatusOfTarget.trim()) < dmd.getEventTo()) { - return status.getPath(); - } - } - } - } - } else { - //If dbname is null(in case of repl load *), can't get repl status of target, return unsupported - throw new UnsupportedOperationException("REPL LOAD * is not supported"); - } - } - return null; - } - private List dirLocationsToCopy(Path loadPath, boolean isIncrementalPhase) throws HiveException, IOException { List list = new ArrayList<>(); @@ -519,15 +466,10 @@ private void initReplStatus(ASTNode ast) throws SemanticException{ private void analyzeReplStatus(ASTNode ast) throws SemanticException { initReplStatus(ast); + String dbNameOrPattern = replScope.getDbName(); - String replLastId = getReplStatus(dbNameOrPattern); - prepareReturnValues(Collections.singletonList(replLastId), "last_repl_id#string"); - setFetchTask(createFetchTask("last_repl_id#string")); - LOG.debug("ReplicationSemanticAnalyzer.analyzeReplStatus: writing repl.last.id={} out to {} using configuration {}", - replLastId, ctx.getResFile(), conf); - } + String replLastId = null; - private String getReplStatus(String dbNameOrPattern) throws SemanticException { try { // Checking for status of a db Database database = db.getDatabase(dbNameOrPattern); @@ -535,14 +477,18 @@ private String getReplStatus(String dbNameOrPattern) throws SemanticException { inputs.add(new ReadEntity(database)); Map params = database.getParameters(); if (params != null && (params.containsKey(ReplicationSpec.KEY.CURR_STATE_ID.toString()))) { - return params.get(ReplicationSpec.KEY.CURR_STATE_ID.toString()); + replLastId = params.get(ReplicationSpec.KEY.CURR_STATE_ID.toString()); } } } catch (HiveException e) { throw new SemanticException(e); // TODO : simple wrap & rethrow for now, clean up with error - // codes + // codes } - return null; + + prepareReturnValues(Collections.singletonList(replLastId), "last_repl_id#string"); + setFetchTask(createFetchTask("last_repl_id#string")); + LOG.debug("ReplicationSemanticAnalyzer.analyzeReplStatus: writing repl.last.id={} out to {}", + replLastId, ctx.getResFile(), conf); } private void prepareReturnValues(List values, String schema) throws SemanticException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java index 97a1dd31a7..ce8c9a51a9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java @@ -107,7 +107,7 @@ private PartitionIterable getPartitions() throws SemanticException { if (tableSpec != null && tableSpec.tableHandle != null && tableSpec.tableHandle.isPartitioned()) { if (tableSpec.specType == TableSpec.SpecType.TABLE_ONLY) { // TABLE-ONLY, fetch partitions if regular export, don't if metadata-only - if (replicationSpec.isMetadataOnly()) { + if (conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY)) { return null; } else { return new PartitionIterable(db, tableSpec.tableHandle, null, conf.getIntVar( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java index 02a9f155c2..e38a0675b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java @@ -31,7 +31,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; -import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -306,15 +305,10 @@ private long parseLong(String fieldName) { long result = -1; if (params != null) { - String val = params.get(fieldName); - if (!StringUtils.isBlank(val)) { - try { - result = Long.parseLong(val); - } catch (NumberFormatException e) { - // Pass-through. This should not happen and we will LOG it, - // but do not fail query. - LOG.warn("Error parsing {} value: {}", fieldName, val); - } + try { + result = Long.parseLong(params.get(fieldName)); + } catch (NumberFormatException e) { + result = -1; } } return result; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseUtils.java index e91a7ed760..e606713b13 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseUtils.java @@ -78,7 +78,7 @@ public TestParseUtils(String query, TxnType txnType) { {"LOAD DATA LOCAL INPATH './examples/files/kv.txt' " + " OVERWRITE INTO TABLE a", TxnType.DEFAULT}, - {"REPL LOAD a INTO a", TxnType.DEFAULT}, + {"REPL LOAD a from './examples/files/kv.txt'", TxnType.DEFAULT}, {"REPL DUMP a", TxnType.DEFAULT}, {"REPL STATUS a", TxnType.DEFAULT}, diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestReplicationSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestReplicationSemanticAnalyzer.java index 81ab01d301..48b9883cbe 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestReplicationSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestReplicationSemanticAnalyzer.java @@ -112,6 +112,40 @@ public void parseTableName() throws ParseException { assertDatabase(2, root); assertTableName(root); } + + @Test + public void parseFromEventId() throws ParseException { + ASTNode root = parse("repl dump testDb.'test_table' from 100"); + assertDatabase(3, root); + assertTableName(root); + assertFromEvent(1, root); + } + + @Test + public void parseToEventId() throws ParseException { + ASTNode root = parse("repl dump testDb.'test_table' from 100 to 200"); + assertDatabase(3, root); + assertTableName(root); + ASTNode fromClauseRootNode = assertFromEvent(3, root); + assertToEventId(fromClauseRootNode); + } + + @Test + public void parseLimit() throws ParseException { + ASTNode root = parse("repl dump testDb.'test_table' from 100 to 200 limit 10"); + assertDatabase(3, root); + assertTableName(root); + ASTNode fromClauseRootNode = assertFromEvent(5, root); + assertToEventId(fromClauseRootNode); + + ASTNode child = (ASTNode) fromClauseRootNode.getChild(3); + assertEquals("TOK_LIMIT", child.getText()); + assertEquals(0, child.getChildCount()); + + child = (ASTNode) fromClauseRootNode.getChild(4); + assertEquals("10", child.getText()); + assertEquals(0, child.getChildCount()); + } } public static class ReplDumpWithClause { @@ -131,26 +165,66 @@ public void parseTableName() throws ParseException { assertTableName(root); assertWithClause(root, 2); } + + @Test + public void parseFromEventId() throws ParseException { + ASTNode root = parse("repl dump testDb.'test_table' from 100 " + + "with ('key.1'='value.1','key.2'='value.2')"); + assertDatabase(4, root); + assertTableName(root); + assertFromEvent(1, root); + assertWithClause(root, 3); + } + + @Test + public void parseToEventId() throws ParseException { + ASTNode root = parse("repl dump testDb.'test_table' from 100 to 200 " + + "with ('key.1'='value.1','key.2'='value.2')"); + assertDatabase(4, root); + assertTableName(root); + ASTNode fromClauseRootNode = assertFromEvent(3, root); + assertToEventId(fromClauseRootNode); + assertWithClause(root, 3); + } + + @Test + public void parseLimit() throws ParseException { + ASTNode root = parse("repl dump testDb.'test_table' from 100 to 200 limit 10 " + + "with ('key.1'='value.1','key.2'='value.2')"); + assertDatabase(4, root); + assertTableName(root); + ASTNode fromClauseRootNode = assertFromEvent(5, root); + assertToEventId(fromClauseRootNode); + assertWithClause(root, 3); + + ASTNode child = (ASTNode) fromClauseRootNode.getChild(3); + assertEquals("TOK_LIMIT", child.getText()); + assertEquals(0, child.getChildCount()); + + child = (ASTNode) fromClauseRootNode.getChild(4); + assertEquals("10", child.getText()); + assertEquals(0, child.getChildCount()); + } } public static class ReplLoad { @Test public void parseFromLocation() throws ParseException { - ASTNode root = parse("repl load testDbName"); + ASTNode root = parse("repl load from '/some/location/in/hdfs/'"); assertFromLocation(1, root); } @Test public void parseTargetDbName() throws ParseException { - ASTNode root = parse("repl load testDbName into targetTestDbName"); + ASTNode root = parse("repl load targetTestDbName from '/some/location/in/hdfs/'"); assertFromLocation(2, root); assertTargetDatabaseName(root); } @Test public void parseWithClause() throws ParseException { - ASTNode root = parse("repl load testDbName into targetTestDbName" + ASTNode root = parse("repl load targetTestDbName from '/some/location/in/hdfs/'" + " with ('mapred.job.queue.name'='repl','hive.repl.approx.max.load.tasks'='100')"); assertFromLocation(3, root); assertTargetDatabaseName(root); @@ -177,7 +251,7 @@ private void assertFromLocation(final int expectedNumberOfChildren, ASTNode root assertEquals("TOK_REPL_LOAD", root.getText()); assertEquals(expectedNumberOfChildren, root.getChildCount()); ASTNode child = (ASTNode) root.getChild(0); - assertEquals("testDbName", child.getText()); + assertEquals("'/some/location/in/hdfs/'", child.getText()); assertEquals(0, child.getChildCount()); } diff --git a/ql/src/test/queries/clientnegative/repl_dump_requires_admin.q b/ql/src/test/queries/clientnegative/repl_dump_requires_admin.q index c227b68acb..e1a6153265 100644 --- a/ql/src/test/queries/clientnegative/repl_dump_requires_admin.q +++ b/ql/src/test/queries/clientnegative/repl_dump_requires_admin.q @@ -26,7 +26,7 @@ show role grant user hive_admin_user; show tables test_repldump_adminpriv; repl dump test_repldump_adminpriv; -dfs -rmr ${system:test.tmp.dir}/hrepl/dGVzdF9yZXBsZHVtcF9hZG1pbnByaXY=/next; +dfs -rmr ${system:test.tmp.dir}/hrepl/test_repldump_adminpriv/next; set user.name=ruser1; show tables test_repldump_adminpriv; diff --git a/ql/src/test/queries/clientnegative/repl_load_requires_admin.q b/ql/src/test/queries/clientnegative/repl_load_requires_admin.q index 0e52e9d7f1..921b50b0b0 100644 --- a/ql/src/test/queries/clientnegative/repl_load_requires_admin.q +++ b/ql/src/test/queries/clientnegative/repl_load_requires_admin.q @@ -29,10 +29,10 @@ show tables test_replload_adminpriv_src; repl dump test_replload_adminpriv_src; -- repl load as admin should succeed -repl load test_replload_adminpriv_src into test_replload_adminpriv_tgt1; +repl load test_replload_adminpriv_tgt1 from '${system:test.tmp.dir}/hrepl/test_replload_adminpriv_src/next/'; show tables test_replload_adminpriv_tgt1; set user.name=ruser1; -- repl load as non-admin should fail -repl load test_replload_adminpriv_src into test_replload_adminpriv_tgt2; +repl load test_replload_adminpriv_tgt2 from '${system:test.tmp.dir}/hrepl/test_replload_adminpriv_src/next'; diff --git a/ql/src/test/queries/clientpositive/repl_load_old_version.q b/ql/src/test/queries/clientpositive/repl_load_old_version.q new file mode 100644 index 0000000000..11ed75dcce --- /dev/null +++ b/ql/src/test/queries/clientpositive/repl_load_old_version.q @@ -0,0 +1,10 @@ +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +REPL LOAD test_db from '../../data/files/repl_dump' with ('hive.exec.parallel'='false'); +use test_db; +show tables; +select * from tbl1 order by fld; +select * from tbl2 order by fld; +select * from tbl3 order by fld; +select * from tbl4 order by fld; +select * from tbl5 order by fld; +select * from tbl6 order by fld1; diff --git a/ql/src/test/results/clientnegative/repl_load_requires_admin.q.out b/ql/src/test/results/clientnegative/repl_load_requires_admin.q.out index 28f9d234d4..1499c39464 100644 --- a/ql/src/test/results/clientnegative/repl_load_requires_admin.q.out +++ b/ql/src/test/results/clientnegative/repl_load_requires_admin.q.out @@ -68,9 +68,8 @@ POSTHOOK: query: repl dump test_replload_adminpriv_src POSTHOOK: type: REPLDUMP POSTHOOK: Input: database:test_replload_adminpriv_src #### A masked pattern was here #### -PREHOOK: query: repl load test_replload_adminpriv_src into test_replload_adminpriv_tgt1 PREHOOK: type: REPLLOAD -POSTHOOK: query: repl load test_replload_adminpriv_src into test_replload_adminpriv_tgt1 +#### A masked pattern was here #### POSTHOOK: type: REPLLOAD PREHOOK: query: show tables test_replload_adminpriv_tgt1 PREHOOK: type: SHOWTABLES diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java index ee2ace8cbe..7b325449ce 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/security/TokenStoreDelegationTokenSecretManager.java @@ -161,10 +161,7 @@ public long renewToken(Token token, String renewer) t synchronized (this) { super.currentTokens.put(id, tokenInfo); try { - long res = super.renewToken(token, renewer); - this.tokenStore.removeToken(id); - this.tokenStore.addToken(id, super.currentTokens.get(id)); - return res; + return super.renewToken(token, renewer); } finally { super.currentTokens.remove(id); }