diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java index 45b674e287..172b4ac446 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java @@ -244,7 +244,12 @@ private void addPartition(boolean hasMorePartitions, AddPartitionDesc addPartiti tmpPath, context.hiveConf, false, false ); - Task movePartitionTask = movePartitionTask(table, partSpec, tmpPath, loadFileType); + + Task movePartitionTask = null; + if (loadFileType != LoadFileType.IGNORE) { + // no need to create move task, if file is moved directly to target location. + movePartitionTask = movePartitionTask(table, partSpec, tmpPath, loadFileType); + } // Set Checkpoint task as dependant to add partition tasks. So, if same dump is retried for // bootstrap, we skip current partition update. @@ -260,10 +265,14 @@ private void addPartition(boolean hasMorePartitions, AddPartitionDesc addPartiti } else { ptnRootTask.addDependentTask(copyTask); } - copyTask.addDependentTask(addPartTask); - addPartTask.addDependentTask(movePartitionTask); - movePartitionTask.addDependentTask(ckptTask); + copyTask.addDependentTask(addPartTask); + if (movePartitionTask != null) { + addPartTask.addDependentTask(movePartitionTask); + movePartitionTask.addDependentTask(ckptTask); + } else { + addPartTask.addDependentTask(ckptTask); + } return ptnRootTask; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java index 82f687b7d6..8538463cc6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java @@ -234,7 +234,7 @@ private String location(ImportTableDesc tblDesc, Database parentDb) LOG.debug("adding dependent CopyWork/AddPart/MoveWork for table " + table.getCompleteName() + " with source location: " - + dataPath.toString() + " and target location " + tmpPath.toString()); + + dataPath.toString() + " and target location " + tgtPath.toString()); Task copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, dataPath, tmpPath, context.hiveConf, false, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index c99d9c1b1f..1c3416e527 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -609,6 +609,16 @@ private static ImportTableDesc getBaseCreateTableDescFromTable(String dbName, moveWork.setLoadTableWork(loadTableWork); } + if (loadFileType == LoadFileType.IGNORE) { + // if file is coped directly to the target location, then no need of move task in case the operation getting + // replayed is add partition. For insert operations, add partition task is anyways a no-op. + if (x.getEventType() == DumpType.EVENT_INSERT) { + copyTask.addDependentTask(TaskFactory.get(moveWork, x.getConf())); + } else { + copyTask.addDependentTask(addPartTask); + } + return copyTask; + } Task loadPartTask = TaskFactory.get(moveWork, x.getConf()); copyTask.addDependentTask(loadPartTask); addPartTask.addDependentTask(loadPartTask);