diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java index 74854db2ab..e417edf25a 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore; import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection; import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments; +import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -32,7 +33,6 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.repl.PathBuilder; import org.apache.hadoop.security.UserGroupInformation; import org.junit.BeforeClass; @@ -198,10 +198,10 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { private void assertTablePartitionLocation(String sourceTableName, String replicaTableName) throws HiveException { Hive hiveForPrimary = Hive.get(primary.hiveConf); - Table sourceTable = hiveForPrimary.getTable(sourceTableName); + org.apache.hadoop.hive.ql.metadata.Table sourceTable = hiveForPrimary.getTable(sourceTableName); Path sourceLocation = sourceTable.getDataLocation(); Hive hiveForReplica = Hive.get(replica.hiveConf); - Table replicaTable = hiveForReplica.getTable(replicaTableName); + org.apache.hadoop.hive.ql.metadata.Table replicaTable = hiveForReplica.getTable(replicaTableName); Path dataLocation = replicaTable.getDataLocation(); assertEquals(REPLICA_EXTERNAL_BASE + sourceLocation.toUri().getPath(), dataLocation.toUri().getPath()); @@ -712,6 +712,64 @@ public void testExternalTableDataPath() throws Exception { assertTrue(dataPath.toUri().getPath().equalsIgnoreCase("/tmp/tmp1/abc/xyz")); } + @Test + public void testExternalTablesIncReplicationWithConcurrentDropTable() throws Throwable { + List dumpWithClause = Collections.singletonList( + "'" + HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES.varname + "'='true'" + ); + List loadWithClause = externalTableBasePathWithClause(); + WarehouseInstance.Tuple tupleBootstrap = primary.run("use " + primaryDbName) + .run("create external table t1 (id int)") + .run("insert into table t1 values (1)") + .dump(primaryDbName, null, dumpWithClause); + + replica.load(replicatedDbName, tupleBootstrap.dumpLocation, loadWithClause); + + // Insert a row into "t1" and create another external table using data from "t1". + primary.run("use " + primaryDbName) + .run("insert into table t1 values (2)") + .run("create external table t2 as select * from t1"); + + // Inject a behavior so that getTable returns null for table "t1". This ensures the table is + // skipped for data files listing. + BehaviourInjection ptnedTableNuller = new BehaviourInjection() { + @Nullable + @Override + public Table apply(@Nullable Table table) { + LOG.info("Performing injection on table " + table.getTableName()); + if (table.getTableName().equalsIgnoreCase("t1")){ + injectionPathCalled = true; + return null; + } else { + nonInjectedPathCalled = true; + return table; + } + } + }; + InjectableBehaviourObjectStore.setGetTableBehaviour(ptnedTableNuller); + WarehouseInstance.Tuple tupleInc; + try { + // The t1 table will be skipped from data location listing. + tupleInc = primary.dump(primaryDbName, tupleBootstrap.lastReplicationId, dumpWithClause); + ptnedTableNuller.assertInjectionsPerformed(true, true); + } finally { + InjectableBehaviourObjectStore.resetGetTableBehaviour(); // reset the behaviour + } + + // Only table t2 should exist in the data location list file. + assertExternalFileInfo(Collections.singletonList("t2"), + new Path(tupleInc.dumpLocation, FILE_NAME)); + + // The newly inserted data "2" should be missing in table "t1". But, table t2 should exist and have + // inserted data. + replica.load(replicatedDbName, tupleInc.dumpLocation, loadWithClause) + .run("use " + replicatedDbName) + .run("select id from t1 order by id") + .verifyResult("1") + .run("select id from t2 order by id") + .verifyResults(Arrays.asList("1", "2")); + } + private List externalTableBasePathWithClause() throws IOException, SemanticException { Path externalTableLocation = new Path(REPLICA_EXTERNAL_BASE); DistributedFileSystem fileSystem = replica.miniDFSCluster.getFileSystem(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java index 11f8bc321d..2f72e23526 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java @@ -256,19 +256,25 @@ private Long incrementalDump(Path dumpRoot, DumpMetaData dmd, Path cmRoot, Hive try (Writer writer = new Writer(dumpRoot, conf)) { for (String tableName : Utils.matchesTbl(hiveDb, dbName, work.tableNameOrPattern)) { - Table table = hiveDb.getTable(dbName, tableName); + try { + Table table = hiveDb.getTable(dbName, tableName); - // Dump external table locations if required. - if (shouldDumpExternalTableLocation() && - TableType.EXTERNAL_TABLE.equals(table.getTableType())) { - writer.dataLocationDump(table); - } + // Dump external table locations if required. + if (shouldDumpExternalTableLocation() && + TableType.EXTERNAL_TABLE.equals(table.getTableType())) { + writer.dataLocationDump(table); + } - // Dump the table to be bootstrapped if required. - if (shouldBootstrapDumpTable(table)) { - HiveWrapper.Tuple tableTuple = new HiveWrapper(hiveDb, dbName).table(table); - dumpTable(dbName, tableName, validTxnList, dbRoot, bootDumpBeginReplId, hiveDb, - tableTuple); + // Dump the table to be bootstrapped if required. + if (shouldBootstrapDumpTable(table)) { + HiveWrapper.Tuple
tableTuple = new HiveWrapper(hiveDb, dbName).table(table); + dumpTable(dbName, tableName, validTxnList, dbRoot, bootDumpBeginReplId, hiveDb, + tableTuple); + } + } catch (InvalidTableException te) { + // Repl dump shouldn't fail if the table is dropped/renamed while dumping it. + // Just log a debug message and skip it. + LOG.debug(te.getMessage()); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java index 7e33f112a5..2ece658694 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -133,7 +134,17 @@ void dataLocationDump(Table table) PathBuilder.fullyQualifiedHDFSUri(table.getDataLocation(), FileSystem.get(hiveConf)); write(lineFor(table.getTableName(), fullyQualifiedDataLocation, hiveConf)); if (table.isPartitioned()) { - List partitions = Hive.get(hiveConf).getPartitions(table); + List partitions; + try { + partitions = Hive.get(hiveConf).getPartitions(table); + } catch (HiveException e) { + if (e.getCause() instanceof NoSuchObjectException) { + // If table is dropped when dump in progress, just skip partitions data location dump + return; + } + throw e; + } + for (Partition partition : partitions) { boolean partitionLocOutsideTableLoc = !FileUtils.isPathWithinSubtree( partition.getDataLocation(), table.getDataLocation()