diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java index 81feaf5eec..1ba8003384 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java @@ -167,7 +167,8 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { .run("select country from t2 where country = 'us'") .verifyResult("us") .run("select country from t2 where country = 'france'") - .verifyResult("france"); + .verifyResult("france") + .run("show partitions t2").verifyResults(new String[] {"country=france", "country=india", "country=us"}); String hiveDumpLocation = tuple.dumpLocation + File.separator + ReplUtils.REPL_HIVE_BASE_DIR; // Ckpt should be set on bootstrapped db. @@ -343,6 +344,8 @@ public void externalTableWithPartitions() throws Throwable { .verifyResults(new String[] { "bangalore", "pune", "mumbai" }) .run("select place from t2 where country='australia'") .verifyResults(new String[] { "sydney" }) + .run("show partitions t2") + .verifyResults(new String[] {"country=australia", "country=india"}) .verifyReplTargetProperty(replicatedDbName); Path customPartitionLocation = @@ -364,6 +367,8 @@ public void externalTableWithPartitions() throws Throwable { .run("use " + replicatedDbName) .run("select place from t2 where country='france'") .verifyResults(new String[] { "paris" }) + .run("show partitions t2") + .verifyResults(new String[] {"country=australia", "country=france", "country=india"}) .verifyReplTargetProperty(replicatedDbName); // change the location of the partition via alter command diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java index 624f29b524..c260a7d3bc 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java @@ -166,7 +166,8 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { .run("select country from t2 where country = 'us'") .verifyResult(null) .run("select country from t2 where country = 'france'") - .verifyResult(null); + .verifyResult(null) + .run("show partitions t2").verifyResults(new String[] {"country=france", "country=india", "country=us"}); // Ckpt should be set on bootstrapped db. String hiveDumpLocation = tuple.dumpLocation + File.separator + REPL_HIVE_BASE_DIR; @@ -279,7 +280,9 @@ public void externalTableWithPartitions() throws Throwable { .verifyResults(new String[] {"t2"}) .run("select place from t2") .verifyResults(new String[] {}) - .verifyReplTargetProperty(replicatedDbName); + .verifyReplTargetProperty(replicatedDbName) + .run("show partitions t2") + .verifyResults(new String[] {"country=india"}); // add new data externally, to a partition, but under the table level top directory Path partitionDir = new Path(externalTableLocation, "country=india"); @@ -302,6 +305,8 @@ public void externalTableWithPartitions() throws Throwable { .verifyResults(new String[] {}) .run("select place from t2 where country='australia'") .verifyResults(new String[] {}) + .run("show partitions t2") + .verifyResults(new String[] {"country=australia", "country=india"}) .verifyReplTargetProperty(replicatedDbName); Path customPartitionLocation = @@ -323,6 +328,8 @@ public void externalTableWithPartitions() throws Throwable { .run("use " + replicatedDbName) .run("select place from t2 where country='france'") .verifyResults(new String[] {}) + .run("show partitions t2") + .verifyResults(new String[] {"country=australia", "country=france", "country=india"}) .verifyReplTargetProperty(replicatedDbName); // change the location of the partition via alter command diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java index 92e45b4c57..aa59457119 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java @@ -615,7 +615,7 @@ void dumpTable(String dbName, String tblName, String validTxnList, Path dbRoot, exportPaths, tableSpec, tuple.replicationSpec, hiveDb, distCpDoAsUser, conf, mmCtx).write(false); replLogger.tableLog(tblName, tableSpec.tableHandle.getTableType()); if (tableSpec.tableHandle.getTableType().equals(TableType.EXTERNAL_TABLE) - || Utils.shouldDumpMetaDataOnly(tuple.object, conf)) { + || Utils.shouldDumpMetaDataOnly(conf)) { return; } for (ReplPathMapping replPathMapping: replPathMappings) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java index 13e4a8cbee..5c8d0edd77 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java @@ -52,6 +52,7 @@ private boolean needDupCopyCheck = false; //Determine if replication is done using repl or export-import private boolean isRepl = false; + private boolean isMetadataOnlyForExternalTables = false; // Key definitions related to replication. public enum KEY { @@ -279,6 +280,17 @@ public void setIsMetadataOnly(boolean isMetadataOnly){ this.isMetadataOnly = isMetadataOnly; } + /** + * @return true if this statement refers to metadata-only operation. + */ + public boolean isMetadataOnlyForExternalTables() { + return isMetadataOnlyForExternalTables; + } + + public void setMetadataOnlyForExternalTables(boolean metadataOnlyForExternalTables) { + isMetadataOnlyForExternalTables = metadataOnlyForExternalTables; + } + /** * @return true if this statement refers to insert-into or insert-overwrite operation. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java index f9648c8961..a1f2fb960d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java @@ -57,7 +57,8 @@ public HiveWrapper(Hive db, String dbName, long lastReplId) { public Tuple table(final String tableName, HiveConf conf) throws HiveException { // Column statistics won't be accurate if we are dumping only metadata - boolean getColStats = !Utils.shouldDumpMetaDataOnly(db.getTable(dbName, tableName), conf); + boolean getColStats = !Utils.shouldDumpMetaDataOnlyForExternalTables(db.getTable(dbName, tableName), conf) + && !Utils.shouldDumpMetaDataOnly(conf); return new Tuple<>(functionForSpec, () -> db.getTable(dbName, tableName, true, false, getColStats)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java index a384c7e7b0..a26b15948c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java @@ -75,11 +75,20 @@ public TableExport(Paths paths, TableSpec tableSpec, ReplicationSpec replication ? null : tableSpec; this.replicationSpec = replicationSpec; - if (this.tableSpec != null && this.tableSpec.tableHandle!=null && (this.tableSpec.tableHandle.isView() || - Utils.shouldDumpMetaDataOnly(this.tableSpec.tableHandle, conf))) { - this.replicationSpec.setIsMetadataOnly(true); - - this.tableSpec.tableHandle.setStatsStateLikeNewTable(); + if (this.tableSpec != null && this.tableSpec.tableHandle!=null) { + //If table is view or if should dump metadata only flag used by DAS is set to true + //enable isMetadataOnly + if (this.tableSpec.tableHandle.isView() || Utils.shouldDumpMetaDataOnly(conf)) { + this.tableSpec.tableHandle.setStatsStateLikeNewTable(); + this.replicationSpec.setIsMetadataOnly(true); + } + //If table is view or if should dump metadata only for external table flag is set to true + //enable isMetadataOnlyForExternalTable + if (this.tableSpec.tableHandle.isView() + || Utils.shouldDumpMetaDataOnlyForExternalTables(this.tableSpec.tableHandle, conf)) { + this.tableSpec.tableHandle.setStatsStateLikeNewTable(); + this.replicationSpec.setMetadataOnlyForExternalTables(true); + } } this.db = db; this.distCpDoAsUser = distCpDoAsUser; @@ -110,6 +119,7 @@ private PartitionIterable getPartitions() throws SemanticException { if (tableSpec != null && tableSpec.tableHandle != null && tableSpec.tableHandle.isPartitioned()) { if (tableSpec.specType == TableSpec.SpecType.TABLE_ONLY) { // TABLE-ONLY, fetch partitions if regular export, don't if metadata-only + //For metadata only external tables, we still need the partition info if (replicationSpec.isMetadataOnly()) { return null; } else { @@ -315,7 +325,7 @@ public AuthEntities getAuthEntities() throws SemanticException { AuthEntities authEntities = new AuthEntities(); try { // Return if metadata-only - if (replicationSpec.isMetadataOnly()) { + if (replicationSpec.isMetadataOnly() || replicationSpec.isMetadataOnlyForExternalTables()) { return authEntities; } PartitionIterable partitions = getPartitions(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java index 6f8912b5f9..5cb6f1b0c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java @@ -272,9 +272,12 @@ public static boolean shouldReplicate(NotificationEvent tableForEvent, } } - public static boolean shouldDumpMetaDataOnly(Table table, HiveConf conf) { - return conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY) || - (conf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES) && + public static boolean shouldDumpMetaDataOnly(HiveConf conf) { + return conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY); + } + + public static boolean shouldDumpMetaDataOnlyForExternalTables(Table table, HiveConf conf) { + return (conf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES) && table.getTableType().equals(TableType.EXTERNAL_TABLE) && conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY_FOR_EXTERNAL_TABLE)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java index 804607705b..2107be137c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java @@ -94,7 +94,7 @@ protected void writeFileEntry(String dbName, Table table, String file, BufferedW throws IOException, LoginException, MetaException, HiveFatalException { HiveConf hiveConf = withinContext.hiveConf; String distCpDoAsUser = hiveConf.getVar(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER); - if (!Utils.shouldDumpMetaDataOnly(table, withinContext.hiveConf)) { + if (!Utils.shouldDumpMetaDataOnlyForExternalTables(table, withinContext.hiveConf)) { Path dataPath = new Path(withinContext.dumpRoot.toString(), EximUtil.DATA_PATH_NAME); List filePaths = new ArrayList<>(); String[] decodedURISplits = ReplChangeManager.decodeFileUri(file); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java index aedf69870a..0d5d4eaa47 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java @@ -229,7 +229,8 @@ public void handle(Context withinContext) throws Exception { // If we are not dumping metadata about a table, we shouldn't be dumping basic statistics // as well, since that won't be accurate. So reset them to what they would look like for an // empty table. - if (Utils.shouldDumpMetaDataOnly(qlMdTableAfter, withinContext.hiveConf)) { + if (Utils.shouldDumpMetaDataOnly(withinContext.hiveConf) + || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTableAfter, withinContext.hiveConf)) { qlMdTableAfter.setStatsStateLikeNewTable(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java index c8532233a2..7a6ddf9e19 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java @@ -65,7 +65,8 @@ public void handle(Context withinContext) throws Exception { // If we are not dumping data about a table, we shouldn't be dumping basic statistics // as well, since that won't be accurate. So reset them to what they would look like for an // empty table. - if (Utils.shouldDumpMetaDataOnly(qlMdTable, withinContext.hiveConf)) { + if (Utils.shouldDumpMetaDataOnly(withinContext.hiveConf) + || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTable, withinContext.hiveConf)) { qlMdTable.setStatsStateLikeNewTable(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java index 432dd4452f..ba550e49c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java @@ -46,9 +46,9 @@ public void handle(Context withinContext) throws Exception { event.getEventType()); return; } - // Statistics without any data does not make sense. - if (withinContext.replicationSpec.isMetadataOnly()) { + if (withinContext.replicationSpec.isMetadataOnly() + || Utils.shouldDumpMetaDataOnlyForExternalTables(new Table(tableObj), withinContext.hiveConf)) { return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java index 75ee41f636..69d73306fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java @@ -43,7 +43,8 @@ public void handle(Context withinContext) throws Exception { } // Statistics without data doesn't make sense. - if (withinContext.replicationSpec.isMetadataOnly()) { + if (withinContext.replicationSpec.isMetadataOnly() + || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTable, withinContext.hiveConf)) { return; }