diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java index 72da2f1ba3..0b7b82ed05 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.repl.ReplExternalTables; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -673,6 +674,27 @@ public void retryIncBootstrapExternalTablesFromDifferentDumpWithoutCleanTablesCo ErrorMsg.REPL_BOOTSTRAP_LOAD_PATH_NOT_VALID.getErrorCode()); } + @Test + public void testExternalTableDataPath() throws Exception { + HiveConf conf = primary.getConf(); + Path basePath = new Path("/"); + Path sourcePath = new Path("/abc/xyz"); + Path dataPath = ReplExternalTables.externalTableDataPath(conf, basePath, sourcePath); + assertTrue(dataPath.toUri().getPath().equalsIgnoreCase("/abc/xyz")); + + basePath = new Path("/tmp"); + dataPath = ReplExternalTables.externalTableDataPath(conf, basePath, sourcePath); + assertTrue(dataPath.toUri().getPath().equalsIgnoreCase("/tmp/abc/xyz")); + + basePath = new Path("/tmp/"); + dataPath = ReplExternalTables.externalTableDataPath(conf, basePath, sourcePath); + assertTrue(dataPath.toUri().getPath().equalsIgnoreCase("/tmp/abc/xyz")); + + basePath = new Path("/tmp/tmp1//"); + dataPath = ReplExternalTables.externalTableDataPath(conf, basePath, sourcePath); + assertTrue(dataPath.toUri().getPath().equalsIgnoreCase("/tmp/tmp1/abc/xyz")); + } + private List externalTableBasePathWithClause() throws IOException, SemanticException { Path externalTableLocation = new Path(REPLICA_EXTERNAL_BASE); DistributedFileSystem fileSystem = replica.miniDFSCluster.getFileSystem(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java index 015bc2653b..7e33f112a5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java @@ -66,18 +66,31 @@ public static String externalTableLocation(HiveConf hiveConf, String location) t String baseDir = hiveConf.get(HiveConf.ConfVars.REPL_EXTERNAL_TABLE_BASE_DIR.varname); Path basePath = new Path(baseDir); Path currentPath = new Path(location); - String targetPathWithoutSchemeAndAuth = basePath.toUri().getPath() + currentPath.toUri().getPath(); - Path dataLocation; + Path dataLocation = externalTableDataPath(hiveConf, basePath, currentPath); + + LOG.info("Incoming external table location: {} , new location: {}", location, dataLocation.toString()); + return dataLocation.toString(); + } + + public static Path externalTableDataPath(HiveConf hiveConf, Path basePath, Path sourcePath) + throws SemanticException { + String baseUriPath = basePath.toUri().getPath(); + String sourceUriPath = sourcePath.toUri().getPath(); + + // "/" is input for base directory, then we should use exact same path as source or else append + // source path under the base directory. + String targetPathWithoutSchemeAndAuth + = "/".equalsIgnoreCase(baseUriPath) ? sourceUriPath : (baseUriPath + sourceUriPath); + Path dataPath; try { - dataLocation = PathBuilder.fullyQualifiedHDFSUri( + dataPath = PathBuilder.fullyQualifiedHDFSUri( new Path(targetPathWithoutSchemeAndAuth), basePath.getFileSystem(hiveConf) ); } catch (IOException e) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e); } - LOG.info("Incoming external table location: {} , new location: {}", location, dataLocation.toString()); - return dataLocation.toString(); + return dataPath; } public static class Writer implements Closeable { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java index 2036d6986f..ab5cb4989f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.repl.ReplDumpWork; +import org.apache.hadoop.hive.ql.exec.repl.ReplExternalTables; import org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.metadata.Hive; @@ -397,12 +398,8 @@ private void analyzeReplLoad(ASTNode ast) throws SemanticException { for (String location : new Reader(conf, loadPath, isIncrementalPhase).sourceLocationsToCopy()) { Path sourcePath = new Path(location); - String targetPathWithoutSchemeAndAuth = basePath.toUri().getPath() + sourcePath.toUri().getPath(); - Path fullyQualifiedTargetUri = PathBuilder.fullyQualifiedHDFSUri( - new Path(targetPathWithoutSchemeAndAuth), - basePath.getFileSystem(conf) - ); - list.add(new DirCopyWork(sourcePath, fullyQualifiedTargetUri)); + Path targetPath = ReplExternalTables.externalTableDataPath(conf, basePath, sourcePath); + list.add(new DirCopyWork(sourcePath, targetPath)); } return list; }