diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java index 81feaf5eec..82c46cf866 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java @@ -167,7 +167,8 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { .run("select country from t2 where country = 'us'") .verifyResult("us") .run("select country from t2 where country = 'france'") - .verifyResult("france"); + .verifyResult("france") + .run("show partitions t2").verifyResults(new String[] {"country=france", "country=india", "country=us"}); String hiveDumpLocation = tuple.dumpLocation + File.separator + ReplUtils.REPL_HIVE_BASE_DIR; // Ckpt should be set on bootstrapped db. diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java index 624f29b524..c260a7d3bc 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java @@ -166,7 +166,8 @@ public void externalTableReplicationWithDefaultPaths() throws Throwable { .run("select country from t2 where country = 'us'") .verifyResult(null) .run("select country from t2 where country = 'france'") - .verifyResult(null); + .verifyResult(null) + .run("show partitions t2").verifyResults(new String[] {"country=france", "country=india", "country=us"}); // Ckpt should be set on bootstrapped db. String hiveDumpLocation = tuple.dumpLocation + File.separator + REPL_HIVE_BASE_DIR; @@ -279,7 +280,9 @@ public void externalTableWithPartitions() throws Throwable { .verifyResults(new String[] {"t2"}) .run("select place from t2") .verifyResults(new String[] {}) - .verifyReplTargetProperty(replicatedDbName); + .verifyReplTargetProperty(replicatedDbName) + .run("show partitions t2") + .verifyResults(new String[] {"country=india"}); // add new data externally, to a partition, but under the table level top directory Path partitionDir = new Path(externalTableLocation, "country=india"); @@ -302,6 +305,8 @@ public void externalTableWithPartitions() throws Throwable { .verifyResults(new String[] {}) .run("select place from t2 where country='australia'") .verifyResults(new String[] {}) + .run("show partitions t2") + .verifyResults(new String[] {"country=australia", "country=india"}) .verifyReplTargetProperty(replicatedDbName); Path customPartitionLocation = @@ -323,6 +328,8 @@ public void externalTableWithPartitions() throws Throwable { .run("use " + replicatedDbName) .run("select place from t2 where country='france'") .verifyResults(new String[] {}) + .run("show partitions t2") + .verifyResults(new String[] {"country=australia", "country=france", "country=india"}) .verifyReplTargetProperty(replicatedDbName); // change the location of the partition via alter command diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java index 05a590a189..e28619ad12 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java @@ -129,7 +129,8 @@ public TaskTracker tasks() throws Exception { // existing if (table.isPartitioned()) { List partitionDescs = event.partitionDescriptions(tableDesc); - if (!event.replicationSpec().isMetadataOnly() && !partitionDescs.isEmpty()) { + if (!event.replicationSpec().isMetadataOnly() && !event.replicationSpec().isMetadataOnlyForExternalTables() + && !partitionDescs.isEmpty()) { updateReplicationState(initialReplicationState()); if (!forExistingTable(lastReplicatedPartition).hasReplicationState()) { // Add ReplStateLogTask only if no pending table load tasks left for next cycle @@ -205,6 +206,7 @@ private void addPartition(boolean hasMorePartitions, AlterTableAddPartitionDesc ); boolean isOnlyDDLOperation = event.replicationSpec().isMetadataOnly() + || event.replicationSpec().isMetadataOnlyForExternalTables() || (TableType.EXTERNAL_TABLE.equals(table.getTableType()) && !event.replicationSpec().isMigratingToExternalTable() ); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java index 82a30319b5..f3d3775602 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java @@ -191,7 +191,7 @@ private void newTableTasks(ImportTableDesc tblDesc, Task tblRootTask, TableLo } else { tblRootTask.addDependentTask(createTableTask); } - if (replicationSpec.isMetadataOnly()) { + if (replicationSpec.isMetadataOnly() || replicationSpec.isMetadataOnlyForExternalTables()) { tracker.addTask(tblRootTask); return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 7354a3e7e0..eb1a150127 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -1224,7 +1224,7 @@ private static void createReplImportTasks( dependentTasks = new ArrayList<>(partitionDescs.size()); for (AlterTableAddPartitionDesc addPartitionDesc : partitionDescs) { addPartitionDesc.setReplicationSpec(replicationSpec); - if (!replicationSpec.isMetadataOnly()) { + if (!replicationSpec.isMetadataOnly() && !replicationSpec.isMetadataOnlyForExternalTables()) { dependentTasks.add(addSinglePartition(tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId)); } else { @@ -1236,7 +1236,7 @@ private static void createReplImportTasks( addPartitionDesc.getPartitions().get(0).getPartSpec()); } } - } else if (!replicationSpec.isMetadataOnly() + } else if (!replicationSpec.isMetadataOnly() && !replicationSpec.isMetadataOnlyForExternalTables() && !shouldSkipDataCopyInReplScope(tblDesc, replicationSpec)) { x.getLOG().debug("adding dependent CopyWork/MoveWork for table"); dependentTasks = new ArrayList<>(1); @@ -1302,7 +1302,7 @@ private static void createReplImportTasks( } if (ptn == null) { - if (!replicationSpec.isMetadataOnly()){ + if (!replicationSpec.isMetadataOnly() && !replicationSpec.isMetadataOnlyForExternalTables()){ x.getTasks().add(addSinglePartition( tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId)); if (updatedMetadata != null) { @@ -1319,7 +1319,7 @@ private static void createReplImportTasks( // If replicating, then the partition already existing means we need to replace, maybe, if // the destination ptn's repl.last.id is older than the replacement's. if (replicationSpec.allowReplacementInto(ptn.getParameters())){ - if (!replicationSpec.isMetadataOnly()){ + if (!replicationSpec.isMetadataOnly() && !replicationSpec.isMetadataOnlyForExternalTables()){ x.getTasks().add(addSinglePartition( tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId)); } else { @@ -1335,7 +1335,8 @@ private static void createReplImportTasks( } } } - if (replicationSpec.isMetadataOnly() && partitionDescs.isEmpty()){ + if ((replicationSpec.isMetadataOnly() || replicationSpec.isMetadataOnlyForExternalTables()) + && partitionDescs.isEmpty()){ // MD-ONLY table alter x.getTasks().add(alterTableTask(tblDesc, x,replicationSpec)); if (lockType == WriteEntity.WriteType.DDL_NO_LOCK){ @@ -1344,7 +1345,7 @@ private static void createReplImportTasks( } } else { x.getLOG().debug("table non-partitioned"); - if (!replicationSpec.isMetadataOnly()) { + if (!replicationSpec.isMetadataOnly() && !replicationSpec.isMetadataOnlyForExternalTables()) { // repl-imports are replace-into unless the event is insert-into loadTable(fromURI, table, replicationSpec.isReplace(), new Path(tblDesc.getLocation()), replicationSpec, x, writeId, stmtId); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java index 13e4a8cbee..6bf92b1df3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java @@ -52,6 +52,7 @@ private boolean needDupCopyCheck = false; //Determine if replication is done using repl or export-import private boolean isRepl = false; + private boolean isMetadataOnlyForExternalTables = false; // Key definitions related to replication. public enum KEY { @@ -279,6 +280,17 @@ public void setIsMetadataOnly(boolean isMetadataOnly){ this.isMetadataOnly = isMetadataOnly; } + /** + * @return true if this statement refers to metadata-only operation. + */ + public boolean isMetadataOnlyForExternalTables() { + return isMetadataOnlyForExternalTables; + } + + public void setMetadataOnlyForExternalTables(boolean metadataOnlyForExternalTables) { + isMetadataOnlyForExternalTables = metadataOnlyForExternalTables; + } + /** * @return true if this statement refers to insert-into or insert-overwrite operation. */ @@ -379,7 +391,7 @@ public String get(KEY key) { public SCOPE getScope(){ if (isInReplicationScope()){ - if (isMetadataOnly()){ + if (isMetadataOnly() || isMetadataOnlyForExternalTables()){ return SCOPE.MD_ONLY; } else { return SCOPE.REPL; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java index f9648c8961..a1f2fb960d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java @@ -57,7 +57,8 @@ public HiveWrapper(Hive db, String dbName, long lastReplId) { public Tuple table(final String tableName, HiveConf conf) throws HiveException { // Column statistics won't be accurate if we are dumping only metadata - boolean getColStats = !Utils.shouldDumpMetaDataOnly(db.getTable(dbName, tableName), conf); + boolean getColStats = !Utils.shouldDumpMetaDataOnlyForExternalTables(db.getTable(dbName, tableName), conf) + && !Utils.shouldDumpMetaDataOnly(conf); return new Tuple<>(functionForSpec, () -> db.getTable(dbName, tableName, true, false, getColStats)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java index a384c7e7b0..c0052256d0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java @@ -75,11 +75,16 @@ public TableExport(Paths paths, TableSpec tableSpec, ReplicationSpec replication ? null : tableSpec; this.replicationSpec = replicationSpec; - if (this.tableSpec != null && this.tableSpec.tableHandle!=null && (this.tableSpec.tableHandle.isView() || - Utils.shouldDumpMetaDataOnly(this.tableSpec.tableHandle, conf))) { - this.replicationSpec.setIsMetadataOnly(true); - - this.tableSpec.tableHandle.setStatsStateLikeNewTable(); + if (this.tableSpec != null && this.tableSpec.tableHandle!=null) { + if (this.tableSpec.tableHandle.isView() || Utils.shouldDumpMetaDataOnly(conf)) { + this.tableSpec.tableHandle.setStatsStateLikeNewTable(); + this.replicationSpec.setIsMetadataOnly(true); + } + if (this.tableSpec.tableHandle.isView() + || Utils.shouldDumpMetaDataOnlyForExternalTables(this.tableSpec.tableHandle, conf)) { + this.tableSpec.tableHandle.setStatsStateLikeNewTable(); + this.replicationSpec.setMetadataOnlyForExternalTables(true); + } } this.db = db; this.distCpDoAsUser = distCpDoAsUser; @@ -95,7 +100,7 @@ public TableExport(Paths paths, TableSpec tableSpec, ReplicationSpec replication } else if (shouldExport()) { PartitionIterable withPartitions = getPartitions(); writeMetaData(withPartitions); - if (!replicationSpec.isMetadataOnly() + if (!replicationSpec.isMetadataOnly() && !replicationSpec.isMetadataOnlyForExternalTables() && !(replicationSpec.isRepl() && tableSpec.tableHandle.getTableType().equals(TableType.EXTERNAL_TABLE))) { replPathMappings = writeData(withPartitions, isExportTask); } @@ -110,6 +115,7 @@ private PartitionIterable getPartitions() throws SemanticException { if (tableSpec != null && tableSpec.tableHandle != null && tableSpec.tableHandle.isPartitioned()) { if (tableSpec.specType == TableSpec.SpecType.TABLE_ONLY) { // TABLE-ONLY, fetch partitions if regular export, don't if metadata-only + //For metadata only external tables, we still need the partition info if (replicationSpec.isMetadataOnly()) { return null; } else { @@ -315,7 +321,7 @@ public AuthEntities getAuthEntities() throws SemanticException { AuthEntities authEntities = new AuthEntities(); try { // Return if metadata-only - if (replicationSpec.isMetadataOnly()) { + if (replicationSpec.isMetadataOnly() || replicationSpec.isMetadataOnlyForExternalTables()) { return authEntities; } PartitionIterable partitions = getPartitions(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java index 6f8912b5f9..222e48eef2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java @@ -189,7 +189,8 @@ public static boolean shouldReplicate(ReplicationSpec replicationSpec, Table tab } // if its metadata only, then dump metadata of non native tables also. - if (tableHandle.isNonNative() && !replicationSpec.isMetadataOnly()) { + if (tableHandle.isNonNative() && !replicationSpec.isMetadataOnly() + && !replicationSpec.isMetadataOnlyForExternalTables()) { return false; } @@ -200,7 +201,7 @@ public static boolean shouldReplicate(ReplicationSpec replicationSpec, Table tab if (MetaStoreUtils.isExternalTable(tableHandle.getTTable())) { boolean shouldReplicateExternalTables = hiveConf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES) - || replicationSpec.isMetadataOnly(); + || replicationSpec.isMetadataOnlyForExternalTables(); if (isEventDump) { // Skip dumping of events related to external tables if bootstrap is enabled on it. // Also, skip if current table is included only in new policy but not in old policy. @@ -272,9 +273,12 @@ public static boolean shouldReplicate(NotificationEvent tableForEvent, } } - public static boolean shouldDumpMetaDataOnly(Table table, HiveConf conf) { - return conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY) || - (conf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES) && + public static boolean shouldDumpMetaDataOnly(HiveConf conf) { + return conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY); + } + + public static boolean shouldDumpMetaDataOnlyForExternalTables(Table table, HiveConf conf) { + return (conf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES) && table.getTableType().equals(TableType.EXTERNAL_TABLE) && conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY_FOR_EXTERNAL_TABLE)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java index aedf69870a..0d5d4eaa47 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java @@ -229,7 +229,8 @@ public void handle(Context withinContext) throws Exception { // If we are not dumping metadata about a table, we shouldn't be dumping basic statistics // as well, since that won't be accurate. So reset them to what they would look like for an // empty table. - if (Utils.shouldDumpMetaDataOnly(qlMdTableAfter, withinContext.hiveConf)) { + if (Utils.shouldDumpMetaDataOnly(withinContext.hiveConf) + || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTableAfter, withinContext.hiveConf)) { qlMdTableAfter.setStatsStateLikeNewTable(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java index c8532233a2..7a6ddf9e19 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java @@ -65,7 +65,8 @@ public void handle(Context withinContext) throws Exception { // If we are not dumping data about a table, we shouldn't be dumping basic statistics // as well, since that won't be accurate. So reset them to what they would look like for an // empty table. - if (Utils.shouldDumpMetaDataOnly(qlMdTable, withinContext.hiveConf)) { + if (Utils.shouldDumpMetaDataOnly(withinContext.hiveConf) + || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTable, withinContext.hiveConf)) { qlMdTable.setStatsStateLikeNewTable(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java index 432dd4452f..53dec86a35 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java @@ -48,7 +48,8 @@ public void handle(Context withinContext) throws Exception { } // Statistics without any data does not make sense. - if (withinContext.replicationSpec.isMetadataOnly()) { + if (withinContext.replicationSpec.isMetadataOnly() + || withinContext.replicationSpec.isMetadataOnlyForExternalTables()) { return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java index 75ee41f636..b310838143 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java @@ -43,7 +43,8 @@ public void handle(Context withinContext) throws Exception { } // Statistics without data doesn't make sense. - if (withinContext.replicationSpec.isMetadataOnly()) { + if (withinContext.replicationSpec.isMetadataOnly() + || withinContext.replicationSpec.isMetadataOnlyForExternalTables()) { return; }