diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index 70e1aa7f3a..919f6a1cc2 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hive.conf.HiveConf; @@ -52,6 +53,7 @@ import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.junit.Assert.assertFalse; public class TestReplicationScenariosAcrossInstances { @Rule @@ -675,4 +677,25 @@ public void testIncrementalReplWithEventsBatchHavingDropCreateTable() throws Thr .run("select id from table2 order by id") .verifyResults(new String[] {"2"}); } + + @Test + public void shouldNotCreateDirectoryForNonNativeTableInDumpDirectory() throws Throwable { + String createTableQuery = + "CREATE TABLE custom_serdes( serde_id bigint COMMENT 'from deserializer', name string " + + "COMMENT 'from deserializer', slib string COMMENT 'from deserializer') " + + "ROW FORMAT SERDE 'org.apache.hive.storage.jdbc.JdbcSerDe' " + + "STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' " + + "WITH SERDEPROPERTIES ('serialization.format'='1') " + + "TBLPROPERTIES ( " + + "'hive.sql.database.type'='METASTORE', " + + "'hive.sql.query'='SELECT \"SERDE_ID\", \"NAME\", \"SLIB\" FROM \"SERDES\"')"; + + WarehouseInstance.Tuple bootstrapTuple = primary + .run("use " + primaryDbName) + .run(createTableQuery).dump(primaryDbName, null); + Path cSerdesTableDumpLocation = new Path(bootstrapTuple.dumpLocation + + Path.SEPARATOR + primaryDbName + Path.SEPARATOR + "custom_serdes"); + FileSystem fs = cSerdesTableDumpLocation.getFileSystem(primary.hiveConf); + assertFalse(fs.exists(cSerdesTableDumpLocation)); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java index 119d7923a5..3c6a606b01 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java @@ -50,7 +50,7 @@ protected int execute(DriverContext driverContext) { TableExport.Paths exportPaths = new TableExport.Paths( work.getAstRepresentationForErrorMsg(), work.getExportRootDir(), conf, false); Hive db = getHive(); - LOG.debug("Exporting data to: {}", exportPaths.getExportRootDir()); + LOG.debug("Exporting data to: {}", exportPaths.exportRootDir()); work.acidPostProcess(db); TableExport tableExport = new TableExport(exportPaths, work.getTableSpec(), work.getReplicationSpec(), db, null, conf, work.getMmContext()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java index 2a3986a317..20ff23a46b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java @@ -177,9 +177,10 @@ private boolean shouldExport() { public static class Paths { private final String astRepresentationForErrorMsg; private final HiveConf conf; - private final Path exportRootDir; + //variable access should not be done and use exportRootDir() instead. + private final Path _exportRootDir; private final FileSystem exportFileSystem; - private boolean writeData; + private boolean writeData, exportRootDirCreated = false; public Paths(String astRepresentationForErrorMsg, Path dbRoot, String tblName, HiveConf conf, boolean shouldWriteData) throws SemanticException { @@ -189,12 +190,9 @@ public Paths(String astRepresentationForErrorMsg, Path dbRoot, String tblName, H Path tableRoot = new Path(dbRoot, tblName); URI exportRootDir = EximUtil.getValidatedURI(conf, tableRoot.toUri().toString()); validateTargetDir(exportRootDir); - this.exportRootDir = new Path(exportRootDir); + this._exportRootDir = new Path(exportRootDir); try { - this.exportFileSystem = this.exportRootDir.getFileSystem(conf); - if (!exportFileSystem.exists(this.exportRootDir) && writeData) { - exportFileSystem.mkdirs(this.exportRootDir); - } + this.exportFileSystem = this._exportRootDir.getFileSystem(conf); } catch (IOException e) { throw new SemanticException(e); } @@ -204,20 +202,37 @@ public Paths(String astRepresentationForErrorMsg, String path, HiveConf conf, boolean shouldWriteData) throws SemanticException { this.astRepresentationForErrorMsg = astRepresentationForErrorMsg; this.conf = conf; - this.exportRootDir = new Path(EximUtil.getValidatedURI(conf, path)); + this._exportRootDir = new Path(EximUtil.getValidatedURI(conf, path)); this.writeData = shouldWriteData; try { - this.exportFileSystem = exportRootDir.getFileSystem(conf); - if (!exportFileSystem.exists(this.exportRootDir) && writeData) { - exportFileSystem.mkdirs(this.exportRootDir); - } + this.exportFileSystem = _exportRootDir.getFileSystem(conf); } catch (IOException e) { throw new SemanticException(e); } } Path partitionExportDir(String partitionName) throws SemanticException { - return exportDir(new Path(exportRootDir, partitionName)); + return exportDir(new Path(exportRootDir(), partitionName)); + } + + /** + * Access to the {@link #_exportRootDir} should only be done via this method + * since the creation of the directory is delayed until we figure out if we want + * to write something or not. This is specifically important to prevent empty non-native + * directories being created in repl dump. + */ + public Path exportRootDir() throws SemanticException { + if (!exportRootDirCreated) { + try { + if (!exportFileSystem.exists(this._exportRootDir) && writeData) { + exportFileSystem.mkdirs(this._exportRootDir); + } + exportRootDirCreated = true; + } catch (IOException e) { + throw new SemanticException(e); + } + } + return _exportRootDir; } private Path exportDir(Path exportDir) throws SemanticException { @@ -232,8 +247,8 @@ private Path exportDir(Path exportDir) throws SemanticException { } } - private Path metaDataExportFile() { - return new Path(exportRootDir, EximUtil.METADATA_NAME); + private Path metaDataExportFile() throws SemanticException { + return new Path(exportRootDir(), EximUtil.METADATA_NAME); } /** @@ -241,7 +256,7 @@ private Path metaDataExportFile() { * Partition's data export directory is created within the export semantics of partition. */ private Path dataExportDir() throws SemanticException { - return exportDir(new Path(getExportRootDir(), EximUtil.DATA_PATH_NAME)); + return exportDir(new Path(exportRootDir(), EximUtil.DATA_PATH_NAME)); } /** @@ -274,10 +289,6 @@ private void validateTargetDir(URI rootDirExportFile) throws SemanticException { throw new SemanticException(astRepresentationForErrorMsg, e); } } - - public Path getExportRootDir() { - return exportRootDir; - } } public static class AuthEntities { @@ -311,7 +322,7 @@ public AuthEntities getAuthEntities() throws SemanticException { authEntities.inputs.add(new ReadEntity(tableSpec.tableHandle)); } } - authEntities.outputs.add(toWriteEntity(paths.getExportRootDir(), conf)); + authEntities.outputs.add(toWriteEntity(paths.exportRootDir(), conf)); } catch (Exception e) { throw new SemanticException(e); }