diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index d161841e4f..d1f0def580 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -90,6 +90,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION; +import org.junit.Assert; public class TestReplicationScenarios { @@ -3184,6 +3185,47 @@ public void testLoadCmPathMissing() throws IOException { fs.create(path, false); } + @Test + public void testDumpWithTableDirMissing() throws IOException { + String dbName = createDB(testName.getMethodName(), driver); + run("CREATE TABLE " + dbName + ".normal(a int)", driver); + run("INSERT INTO " + dbName + ".normal values (1)", driver); + + Path path = new Path(System.getProperty("test.warehouse.dir", "")); + path = new Path(path, dbName.toLowerCase() + ".db"); + path = new Path(path, "normal"); + FileSystem fs = path.getFileSystem(hconf); + fs.delete(path); + + advanceDumpDir(); + CommandProcessorResponse ret = driver.run("REPL DUMP " + dbName); + Assert.assertEquals(ret.getResponseCode(), ErrorMsg.FILE_NOT_FOUND.getErrorCode()); + + run("DROP TABLE " + dbName + ".normal", driver); + run("drop database " + dbName, true, driver); + } + + @Test + public void testDumpWithPartitionDirMissing() throws IOException { + String dbName = createDB(testName.getMethodName(), driver); + run("CREATE TABLE " + dbName + ".normal(a int) PARTITIONED BY (part int)", driver); + run("INSERT INTO " + dbName + ".normal partition (part= 124) values (1)", driver); + + Path path = new Path(System.getProperty("test.warehouse.dir","")); + path = new Path(path, dbName.toLowerCase()+".db"); + path = new Path(path, "normal"); + path = new Path(path, "part=124"); + FileSystem fs = path.getFileSystem(hconf); + fs.delete(path); + + advanceDumpDir(); + CommandProcessorResponse ret = driver.run("REPL DUMP " + dbName); + Assert.assertEquals(ret.getResponseCode(), ErrorMsg.FILE_NOT_FOUND.getErrorCode()); + + run("DROP TABLE " + dbName + ".normal", driver); + run("drop database " + dbName, true, driver); + } + @Test public void testDumpNonReplDatabase() throws IOException { String dbName = createDBNonRepl(testName.getMethodName(), driver); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java index a1498cac5c..3ee0747ebd 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java @@ -32,6 +32,10 @@ import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments; import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection; import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.junit.rules.TestName; import org.junit.rules.TestRule; import org.slf4j.Logger; @@ -63,10 +67,11 @@ protected static final Logger LOG = LoggerFactory.getLogger(TestReplicationScenarios.class); private static WarehouseInstance primary, replica, replicaNonAcid; private String primaryDbName, replicatedDbName; + private static HiveConf conf; @BeforeClass public static void classLevelSetup() throws Exception { - Configuration conf = new Configuration(); + conf = new HiveConf(TestReplicationScenariosAcidTables.class); conf.set("dfs.client.use.datanode.hostname", "true"); conf.set("hadoop.proxyuser." + Utils.getUGI().getShortUserName() + ".hosts", "*"); MiniDFSCluster miniDFSCluster = @@ -438,4 +443,49 @@ public Boolean apply(@Nullable CallerArguments args) { .run("select name from t2 order by name") .verifyResults(Arrays.asList("bob", "carl")); } + + @Test + public void testDumpAcidTableWithPartitionDirMissing() throws Throwable { + String dbName = testName.getMethodName(); + primary.run("CREATE DATABASE " + dbName + " WITH DBPROPERTIES ( '" + + SOURCE_OF_REPLICATION + "' = '1,2,3')") + .run("CREATE TABLE " + dbName + ".normal (a int) PARTITIONED BY (part int)" + + " STORED AS ORC TBLPROPERTIES ('transactional'='true')") + .run("INSERT INTO " + dbName + ".normal partition (part= 124) values (1)"); + + Path path = new Path(primary.warehouseRoot, dbName.toLowerCase()+".db"); + path = new Path(path, "normal"); + path = new Path(path, "part=124"); + FileSystem fs = path.getFileSystem(conf); + fs.delete(path); + + CommandProcessorResponse ret = primary.runCommand("REPL DUMP " + dbName + + " with ('hive.repl.dump.include.acid.tables' = 'true')"); + Assert.assertEquals(ret.getResponseCode(), ErrorMsg.FILE_NOT_FOUND.getErrorCode()); + + primary.run("DROP TABLE " + dbName + ".normal"); + primary.run("drop database " + dbName); + } + + @Test + public void testDumpAcidTableWithTableDirMissing() throws Throwable { + String dbName = testName.getMethodName(); + primary.run("CREATE DATABASE " + dbName + " WITH DBPROPERTIES ( '" + + SOURCE_OF_REPLICATION + "' = '1,2,3')") + .run("CREATE TABLE " + dbName + ".normal (a int) " + + " STORED AS ORC TBLPROPERTIES ('transactional'='true')") + .run("INSERT INTO " + dbName + ".normal values (1)"); + + Path path = new Path(primary.warehouseRoot, dbName.toLowerCase()+".db"); + path = new Path(path, "normal"); + FileSystem fs = path.getFileSystem(conf); + fs.delete(path); + + CommandProcessorResponse ret = primary.runCommand("REPL DUMP " + dbName + + " with ('hive.repl.dump.include.acid.tables' = 'true')"); + Assert.assertEquals(ret.getResponseCode(), ErrorMsg.FILE_NOT_FOUND.getErrorCode()); + + primary.run("DROP TABLE " + dbName + ".normal"); + primary.run("drop database " + dbName); + } } diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index c5f35e2c7d..fbd893b017 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -66,6 +66,9 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.junit.Assert; public class TestReplicationScenariosAcrossInstances { @Rule @@ -1096,7 +1099,9 @@ public Boolean apply(@Nullable CallerArguments args) { assertEquals(0, replica.getForeignKeyList(replicatedDbName, "t2").size()); // Retry with different dump should fail. - replica.loadFailure(replicatedDbName, tuple2.dumpLocation); + CommandProcessorResponse ret = replica.runCommand("REPL LOAD " + replicatedDbName + + " FROM '" + tuple2.dumpLocation + "'"); + Assert.assertEquals(ret.getResponseCode(), ErrorMsg.REPL_BOOTSTRAP_LOAD_PATH_NOT_VALID.getErrorCode()); // Verify if create table is not called on table t1 but called for t2 and t3. // Also, allow constraint creation only on t1 and t3. Foreign key creation on t2 fails. diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java index 26f488ece5..e9875b4bdd 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/WarehouseInstance.java @@ -77,6 +77,7 @@ HiveConf hiveConf; MiniDFSCluster miniDFSCluster; private HiveMetaStoreClient client; + public final Path warehouseRoot; private static int uniqueIdentifier = 0; @@ -90,7 +91,7 @@ assert miniDFSCluster.isDataNodeUp(); DistributedFileSystem fs = miniDFSCluster.getFileSystem(); - Path warehouseRoot = mkDir(fs, "/warehouse" + uniqueIdentifier); + warehouseRoot = mkDir(fs, "/warehouse" + uniqueIdentifier); if (StringUtils.isNotEmpty(keyNameForEncryptedZone)) { fs.createEncryptionZone(warehouseRoot, keyNameForEncryptedZone); } @@ -200,6 +201,10 @@ public WarehouseInstance run(String command) throws Throwable { return this; } + public CommandProcessorResponse runCommand(String command) throws Throwable { + return driver.run(command); + } + WarehouseInstance runFailure(String command) throws Throwable { CommandProcessorResponse ret = driver.run(command); if (ret.getException() == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index e0d336a118..7bea5e4215 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -502,7 +502,8 @@ //if the error message is changed for REPL_EVENTS_MISSING_IN_METASTORE, then need modification in getNextNotification //method in HiveMetaStoreClient REPL_EVENTS_MISSING_IN_METASTORE(20016, "Notification events are missing in the meta store."), - REPL_BOOTSTRAP_LOAD_PATH_NOT_VALID(20017, "Target database is bootstrapped from some other path."), + REPL_BOOTSTRAP_LOAD_PATH_NOT_VALID(20017, "Load path {0} not valid as target database is bootstrapped " + + "from some other path : {1}."), REPL_FILE_MISSING_FROM_SRC_AND_CM_PATH(20018, "File is missing from both source and cm path."), REPL_LOAD_PATH_NOT_FOUND(20019, "Load path does not exist."), REPL_DATABASE_IS_NOT_SOURCE_OF_REPLICATION(20020, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java index 78a0ba3708..a14802fe41 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java @@ -121,6 +121,10 @@ protected int execute(DriverContext driverContext) { lastReplId = incrementalDump(dumpRoot, dmd, cmRoot); } prepareReturnValues(Arrays.asList(dumpRoot.toUri().toString(), String.valueOf(lastReplId)), dumpSchema); + } catch (RuntimeException e) { + LOG.error("failed", e); + setException(e); + return ErrorMsg.getErrorMsg(e.getMessage()).getErrorCode(); } catch (Exception e) { LOG.error("failed", e); setException(e); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplUtils.java index 18a83043a6..e41e4b5ec0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplUtils.java @@ -16,7 +16,7 @@ * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.repl; - +import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.ql.exec.Task; @@ -114,9 +114,8 @@ public static boolean replCkptStatus(String dbName, Map props, S if (props.get(REPL_CHECKPOINT_KEY).equals(dumpRoot)) { return true; } - throw new InvalidOperationException("REPL LOAD with Dump: " + dumpRoot - + " is not allowed as the target DB: " + dbName - + " is already bootstrap loaded by another Dump " + props.get(REPL_CHECKPOINT_KEY)); + throw new InvalidOperationException(ErrorMsg.REPL_BOOTSTRAP_LOAD_PATH_NOT_VALID.format(dumpRoot, + props.get(REPL_CHECKPOINT_KEY))); } return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java index 0270d2afec..7ddae6f97c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/LoadDatabase.java @@ -78,7 +78,7 @@ public TaskTracker tasks() throws SemanticException { } return tracker; } catch (Exception e) { - throw new SemanticException(e); + throw new SemanticException(e.getMessage(), e); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/PartitionExport.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/PartitionExport.java index d73fc4f336..4ed6b374fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/PartitionExport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/PartitionExport.java @@ -20,6 +20,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.PartitionIterable; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; @@ -36,6 +37,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; +import java.util.LinkedList; +import java.util.concurrent.Future; import static org.apache.hadoop.hive.ql.parse.repl.dump.TableExport.Paths; @@ -70,10 +73,11 @@ this.callersSession = SessionState.get(); } - void write(final ReplicationSpec forReplicationSpec) throws InterruptedException { + void write(final ReplicationSpec forReplicationSpec) throws InterruptedException, HiveException { + List> futures = new LinkedList<>(); ExecutorService producer = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder().setNameFormat("partition-submitter-thread-%d").build()); - producer.submit(() -> { + futures.add(producer.submit(() -> { SessionState.setCurrentSessionState(callersSession); for (Partition partition : partitionIterable) { try { @@ -83,7 +87,7 @@ void write(final ReplicationSpec forReplicationSpec) throws InterruptedException "Error while queuing up the partitions for export of data files", e); } } - }); + })); producer.shutdown(); ThreadFactory namingThreadFactory = @@ -102,7 +106,7 @@ void write(final ReplicationSpec forReplicationSpec) throws InterruptedException continue; } LOG.debug("scheduling partition dump {}", partition.getName()); - consumer.submit(() -> { + futures.add(consumer.submit(() -> { String partitionName = partition.getName(); String threadName = Thread.currentThread().getName(); LOG.debug("Thread: {}, start partition dump {}", threadName, partitionName); @@ -115,11 +119,19 @@ void write(final ReplicationSpec forReplicationSpec) throws InterruptedException .export(forReplicationSpec); LOG.debug("Thread: {}, finish partition dump {}", threadName, partitionName); } catch (Exception e) { - throw new RuntimeException("Error while export of data files", e); + throw new RuntimeException(e.getMessage(), e); } - }); + })); } consumer.shutdown(); + for (Future future : futures) { + try { + future.get(); + } catch (Exception e) { + LOG.error("failed", e.getCause()); + throw new HiveException(e.getCause().getMessage(), e.getCause()); + } + } // may be drive this via configuration as well. consumer.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java index 2df684def4..c0701c5b87 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.NotificationEvent; +import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidUtils; @@ -38,6 +39,7 @@ import org.slf4j.LoggerFactory; import java.io.DataOutputStream; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.Collection; import java.util.Collections; @@ -210,7 +212,11 @@ public static boolean shouldReplicate(NotificationEvent tableForEvent, static List getDataPathList(Path fromPath, ReplicationSpec replicationSpec, HiveConf conf) throws IOException { if (replicationSpec.isTransactionalTableDump()) { - return AcidUtils.getValidDataPaths(fromPath, conf, replicationSpec.getValidWriteIdList()); + try { + return AcidUtils.getValidDataPaths(fromPath, conf, replicationSpec.getValidWriteIdList()); + } catch (FileNotFoundException e) { + throw new IOException(ErrorMsg.FILE_NOT_FOUND.format(e.getMessage()), e); + } } else { return Collections.singletonList(fromPath); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java index 070c830f75..a934d812fc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.parse.repl.dump.io; import java.io.BufferedWriter; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.ArrayList; @@ -46,6 +47,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.hive.ql.ErrorMsg.FILE_NOT_FOUND; + //TODO: this object is created once to call one method and then immediately destroyed. //So it's basically just a roundabout way to pass arguments to a static method. Simplify? public class FileOperations { @@ -158,6 +161,10 @@ private void exportFilesAsList() throws SemanticException, IOException, LoginExc } done = true; } catch (IOException e) { + if (e instanceof FileNotFoundException) { + logger.error("exporting data files in dir : " + dataPathList + " to " + exportRootDataDir + " failed"); + throw new FileNotFoundException(FILE_NOT_FOUND.format(e.getMessage())); + } repeat++; logger.info("writeFilesList failed", e); if (repeat >= FileUtils.MAX_IO_ERROR_RETRY) {