diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index cd425aa9ab..6dc19e4fd9 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -466,7 +466,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal new TimeValidator(TimeUnit.DAYS), "TTL of dump dirs before cleanup."), REPL_DUMP_METADATA_ONLY("hive.repl.dump.metadata.only", false, - "Indicates whether replication dump only metadata information or data + metadata."), + "Indicates whether replication dump only metadata information or data + metadata. \n" + + "This config makes hive.repl.dump.include.external.tables config ineffective."), REPL_DUMP_INCLUDE_ACID_TABLES("hive.repl.dump.include.acid.tables", false, "Indicates if repl dump should include information about ACID tables. It should be \n" + "used in conjunction with 'hive.repl.dump.metadata.only' to enable copying of \n" @@ -482,6 +483,11 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal REPL_ADD_RAW_RESERVED_NAMESPACE("hive.repl.add.raw.reserved.namespace", false, "For TDE with same encryption keys on source and target, allow Distcp super user to access \n" + "the raw bytes from filesystem without decrypting on source and then encrypting on target."), + REPL_INCLUDE_EXTERNAL_TABLES("hive.repl.dump.include.external.tables", false, + "Indicates if repl dump should include information about external tables. It should be \n" + + "used in conjunction with 'hive.repl.dump.metadata.only' set to false. if 'hive.repl.dump.metadata.only' \n" + + " is set to true then this config parameter has no effect as external table meta data is flushed \n" + + " always by default."), LOCALSCRATCHDIR("hive.exec.local.scratchdir", "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", "Local scratch space for Hive jobs"), diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestExportImport.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestExportImport.java index 67b74c2c26..c33b46baf6 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestExportImport.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestExportImport.java @@ -39,6 +39,7 @@ protected static final Logger LOG = LoggerFactory.getLogger(TestExportImport.class); private static WarehouseInstance srcHiveWarehouse; private static WarehouseInstance destHiveWarehouse; + private static WarehouseInstance dumpExternalWarehouse; @Rule public final TestName testName = new TestName(); @@ -55,9 +56,15 @@ public static void classLevelSetup() throws Exception { HashMap overridesForHiveConf = new HashMap() {{ put(HiveConf.ConfVars.HIVE_IN_TEST.varname, "false"); }}; + HashMap overridesForHiveConfDump = new HashMap() {{ + put(HiveConf.ConfVars.HIVE_IN_TEST.varname, "false"); + put(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES.varname, "true"); + }}; srcHiveWarehouse = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConf); destHiveWarehouse = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConf); + dumpExternalWarehouse = + new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConfDump); } @AfterClass @@ -106,6 +113,54 @@ public void dataImportAfterMetadataOnlyImport() throws Throwable { .verifyResults(new String[] { "1", "2" }); } + @Test + public void testExportExternalTableSetFalse() throws Throwable { + String path = "hdfs:///tmp/" + dbName + "/"; + String exportMDPath = "'" + path + "1/'"; + String exportDataPath = "'" + path + "2/'"; + String exportDataPathRepl = "'" + path + "3/'"; + srcHiveWarehouse.run("create external table " + dbName + ".t1 (i int)") + .run("insert into table " + dbName + ".t1 values (1),(2)") + .run("export table " + dbName + ".t1 to " + exportMDPath + " for metadata replication('1')") + .run("export table " + dbName + ".t1 to " + exportDataPath) + .runFailure("export table " + dbName + ".t1 to " + exportDataPathRepl + " for replication('2')"); + + destHiveWarehouse.run ("use " + replDbName) + .run("import table " + replDbName + ".t1 from " + exportMDPath) + .run("show tables like 't1'") + .verifyResult("t1") + .run("import table " + replDbName + ".t2 from " + exportDataPath) + .run("select * from " + replDbName + ".t2") + .verifyResults(new String[] { "1", "2" }) + .runFailure("import table " + replDbName + ".t3 from " + exportDataPathRepl) + .run("show tables like 't3'") + .verifyFailure(new String[] {"t3"}); + } + + @Test + public void testExportExternalTableSetTrue() throws Throwable { + String path = "hdfs:///tmp/" + dbName + "/"; + String exportMDPath = "'" + path + "1/'"; + String exportDataPath = "'" + path + "2/'"; + String exportDataPathRepl = "'" + path + "3/'"; + dumpExternalWarehouse.run("create external table " + dbName + ".t1 (i int)") + .run("insert into table " + dbName + ".t1 values (1),(2)") + .run("export table " + dbName + ".t1 to " + exportDataPathRepl + " for replication('2')") + .run("export table " + dbName + ".t1 to " + exportMDPath + " for metadata replication('1')") + .run("export table " + dbName + ".t1 to " + exportDataPath); + + destHiveWarehouse.run ("use " + replDbName) + .run("import table " + replDbName + ".t1 from " + exportMDPath) + .run("show tables like 't1'") + .verifyResult("t1") + .run("import table " + replDbName + ".t2 from " + exportDataPath) + .run("select * from " + replDbName + ".t2") + .verifyResults(new String[] { "1", "2" }) + .run("import table " + replDbName + ".t3 from " + exportDataPathRepl) + .run("select * from " + replDbName + ".t3") + .verifyResults(new String[] { "1", "2" }); + } + @Test public void databaseTheTableIsImportedIntoShouldBeParsedFromCommandLine() throws Throwable { String path = "hdfs:///tmp/" + dbName + "/"; diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index 182a77277b..1dbab82629 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -70,7 +70,7 @@ public TestRule replV1BackwardCompat; protected static final Logger LOG = LoggerFactory.getLogger(TestReplicationScenarios.class); - private static WarehouseInstance primary, replica; + private static WarehouseInstance primary, replica, externalDump; private String primaryDbName, replicatedDbName; @BeforeClass @@ -84,8 +84,14 @@ public static void classLevelSetup() throws Exception { put("fs.defaultFS", miniDFSCluster.getFileSystem().getUri().toString()); put(HiveConf.ConfVars.HIVE_IN_TEST_REPL.varname, "true"); }}; + HashMap overridesForHiveConfDump = new HashMap() {{ + put("fs.defaultFS", miniDFSCluster.getFileSystem().getUri().toString()); + put(HiveConf.ConfVars.HIVE_IN_TEST_REPL.varname, "true"); + put(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES.varname, "true"); + }}; primary = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConf); replica = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConf); + externalDump = new WarehouseInstance(LOG, miniDFSCluster, overridesForHiveConfDump); } @AfterClass @@ -811,4 +817,68 @@ public void testIfCkptSetForObjectsByBootstrapReplLoad() throws Throwable { Partition uk = replica.getPartition(replicatedDbName, "t2", Collections.singletonList("uk")); verifyIfCkptSet(uk.getParameters(), tuple.dumpLocation); } + + @Test + public void testDumpExternalTableSetFalse() throws Throwable { + WarehouseInstance.Tuple tuple = primary + .run("use " + primaryDbName) + .run("create external table t1 (id int)") + .run("insert into table t1 values (1)") + .run("insert into table t1 values (2)") + .run("create external table t2 (place string) partitioned by (country string)") + .run("insert into table t2 partition(country='india') values ('bangalore')") + .run("insert into table t2 partition(country='us') values ('austin')") + .run("insert into table t2 partition(country='france') values ('paris')") + .dump(primaryDbName, null); + + replica.load(replicatedDbName, tuple.dumpLocation) + .run("repl status " + replicatedDbName) + .verifyResult(tuple.lastReplicationId) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyFailure(new String[] {"t1"}) + .run("show tables like 't2'") + .verifyFailure(new String[] {"t2"}); + + tuple = primary.run("use " + primaryDbName) + .run("create external table t3 (id int)") + .run("insert into table t3 values (10)") + .run("insert into table t3 values (20)") + .dump("repl dump " + primaryDbName + " from " + tuple.lastReplicationId + + " with ('hive.repl.dump.metadata.only'='true')"); + + replica.load(replicatedDbName, tuple.dumpLocation) + .run("use " + replicatedDbName) + .run("show tables like 't3'") + .verifyResult("t3") + .run("select id from t3 where id = 10") + .verifyFailure(new String[] {"10"}); + } + + @Test + public void testDumpExternalTableSetTrue() throws Throwable { + WarehouseInstance.Tuple tuple = externalDump + .run("use " + primaryDbName) + .run("create external table t1 (id int)") + .run("insert into table t1 values (1)") + .run("insert into table t1 values (2)") + .run("create external table t2 (place string) partitioned by (country string)") + .run("insert into table t2 partition(country='india') values ('bangalore')") + .run("insert into table t2 partition(country='us') values ('austin')") + .run("insert into table t2 partition(country='france') values ('paris')") + .dump(primaryDbName, null); + + replica.load(replicatedDbName, tuple.dumpLocation) + .run("use " + replicatedDbName) + .run("show tables like 't1'") + .verifyResult("t1") + .run("show tables like 't2'") + .verifyResult("t2") + .run("repl status " + replicatedDbName) + .verifyResult(tuple.lastReplicationId) + .run("select country from t2 where country = 'us'") + .verifyResult("us") + .run("select country from t2 where country = 'france'") + .verifyResult("france"); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 8baf309e7f..ca64b066d1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -199,7 +199,7 @@ NEED_TABLE_SPECIFICATION(10117, "Table name could be determined; It should be specified "), PARTITION_EXISTS(10118, "Partition already exists"), TABLE_DATA_EXISTS(10119, "Table exists and contains data files"), - INCOMPATIBLE_SCHEMA(10120, "The existing table is not compatible with the import spec. "), + INCOMPATIBLE_SCHEMA(10120, "The existing table is not compatible with the Export/Import spec. "), EXIM_FOR_NON_NATIVE(10121, "Export/Import cannot be done for a non-native table. "), INSERT_INTO_BUCKETIZED_TABLE(10122, "Bucketized tables do not support INSERT INTO:"), PARTSPEC_DIFFER_FROM_SCHEMA(10125, "Partition columns in partition specification are " diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java index 3c6a606b01..078691cd06 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExportTask.java @@ -55,7 +55,7 @@ protected int execute(DriverContext driverContext) { TableExport tableExport = new TableExport(exportPaths, work.getTableSpec(), work.getReplicationSpec(), db, null, conf, work.getMmContext()); if (!tableExport.write()) { - throw new SemanticException(ErrorMsg.EXIM_FOR_NON_NATIVE.getMsg()); + throw new SemanticException(ErrorMsg.INCOMPATIBLE_SCHEMA.getMsg()); } } catch (Exception e) { LOG.error("failed", e); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java index 14572ad8ae..fccac41173 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.NotificationEvent; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Hive; @@ -176,6 +177,12 @@ public static Boolean shouldReplicate(ReplicationSpec replicationSpec, Table tab return false; } + if (!hiveConf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES) && + MetaStoreUtils.isExternalTable(tableHandle.getTTable()) && + !replicationSpec.isMetadataOnly() && replicationSpec.isInReplicationScope()) { + return false; + } + if (replicationSpec.isInReplicationScope()) { boolean isAcidTable = AcidUtils.isTransactionalTable(tableHandle); if (isAcidTable) {