diff --git ql/src/java/org/apache/hadoop/hive/ql/Context.java ql/src/java/org/apache/hadoop/hive/ql/Context.java index 74b4711..668e580 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; import java.io.DataInput; @@ -54,8 +53,6 @@ import java.util.Random; import java.util.concurrent.ConcurrentHashMap; -import javax.security.auth.login.LoginException; - /** * Context for Semantic Analyzers. Usage: not reusable - construct a new one for * each query should call clear() at end of use to remove temporary folders @@ -337,7 +334,14 @@ public Path getLocalTmpPath() { * external URI to which the tmp data has to be eventually moved * @return next available tmp path on the file system corresponding extURI */ - public Path getExternalTmpPath(URI extURI) { + public Path getExternalTmpPath(Path path) { + URI extURI = path.toUri(); + if (extURI.getScheme().equals("viewfs")) { + // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/.. + // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir + // on same namespace as tbl dir. + return getExtTmpPathRelTo(path.getParent()); + } return new Path(getExternalScratchDir(extURI), EXT_PREFIX + nextPathId()); } @@ -347,7 +351,8 @@ public Path getExternalTmpPath(URI extURI) { * within passed in uri, whereas getExternalTmpPath() ignores passed in path and returns temp * path within /tmp */ - public Path getExtTmpPathRelTo(URI uri) { + public Path getExtTmpPathRelTo(Path path) { + URI uri = path.toUri(); return new Path (getScratchDir(uri.getScheme(), uri.getAuthority(), !explain, uri.getPath() + Path.SEPARATOR + "_" + this.executionId), EXT_PREFIX + nextPathId()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 7d62f45..921df0c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -1283,7 +1283,7 @@ private int archive(Hive db, AlterTableSimpleDesc simpleDesc, // First create the archive in a tmp dir so that if the job fails, the // bad files don't pollute the filesystem Path tmpPath = new Path(driverContext.getCtx() - .getExternalTmpPath(originalDir.toUri()), "partlevel"); + .getExternalTmpPath(originalDir), "partlevel"); console.printInfo("Creating " + archiveName + " for " + originalDir.toString()); @@ -1478,7 +1478,7 @@ private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) throw new HiveException("Haven't found any archive where it should be"); } - Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir.toUri()); + Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir); try { fs = tmpPath.getFileSystem(conf); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java index 8604c5e..ef72039 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java @@ -499,7 +499,7 @@ private void handleSampling(DriverContext context, MapWork mWork, JobConf job, H inputPaths.add(new Path(path)); } - Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0).toUri()); + Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0)); Path partitionFile = new Path(tmpPath, ".partitions"); ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile); PartitionKeySampler sampler = new PartitionKeySampler(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 77f56c1..325e5b3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -1697,12 +1697,8 @@ public static Path createMoveTask(Task currTask, boolean // generate the temporary file // it must be on the same file system as the current destination Context baseCtx = parseCtx.getContext(); - // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/.. - // to final location /user/hive/warehouse/ will fail later, so instead pick tmp dir - // on same namespace as tbl dir. - Path tmpDir = dest.toUri().getScheme().equals("viewfs") ? - baseCtx.getExtTmpPathRelTo(dest.toUri()) : - baseCtx.getExternalTmpPath(dest.toUri()); + + Path tmpDir = baseCtx.getExternalTmpPath(dest); FileSinkDesc fileSinkDesc = fsOp.getConf(); // Change all the linked file sink descriptors diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index c0322fb..bcef26b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -969,7 +969,7 @@ private void analyzeTruncateTable(ASTNode ast) throws SemanticException { TableDesc tblDesc = Utilities.getTableDesc(table); // Write the output to temporary directory and move it to the final location at the end // so the operation is atomic. - Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc.toUri()); + Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc); truncateTblDesc.setOutputDir(queryTmpdir); LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap() : partSpec); @@ -1549,7 +1549,7 @@ private void analyzeAlterTablePartMergeFiles(ASTNode tablePartAST, ASTNode ast, ddlWork.setNeedLock(true); Task mergeTask = TaskFactory.get(ddlWork, conf); TableDesc tblDesc = Utilities.getTableDesc(tblObj); - Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc.toUri()); + Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc); mergeDesc.setOutputDir(queryTmpdir); LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap() : partSpec); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 83b6304..d86df45 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -276,7 +276,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { private Task loadTable(URI fromURI, Table table) { Path dataPath = new Path(fromURI.toString(), "data"); - Path tmpPath = ctx.getExternalTmpPath(fromURI); + Path tmpPath = ctx.getExternalTmpPath(new Path(fromURI)); Task copyTask = TaskFactory.get(new CopyWork(dataPath, tmpPath, false), conf); LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, @@ -321,7 +321,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { LOG.debug("adding dependent CopyWork/AddPart/MoveWork for partition " + partSpecToString(partSpec.getPartSpec()) + " with source location: " + srcLocation); - Path tmpPath = ctx.getExternalTmpPath(fromURI); + Path tmpPath = ctx.getExternalTmpPath(new Path(fromURI)); Task copyTask = TaskFactory.get(new CopyWork(new Path(srcLocation), tmpPath, false), conf); Task addPartTask = TaskFactory.get(new DDLWork(getInputs(), diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d38270c..2d89c4a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1426,7 +1426,7 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException } try { fname = ctx.getExternalTmpPath( - FileUtils.makeQualified(location, conf).toUri()).toString(); + FileUtils.makeQualified(location, conf)).toString(); } catch (Exception e) { throw new SemanticException(generateErrorMessage(ast, "Error creating temporary folder on: " + location.toString()), e); @@ -5662,12 +5662,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) if (isNonNativeTable) { queryTmpdir = dest_path; } else { - // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/.. - // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir - // on same namespace as tbl dir. - queryTmpdir = dest_path.toUri().getScheme().equals("viewfs") ? - ctx.getExtTmpPathRelTo(dest_path.getParent().toUri()) : - ctx.getExternalTmpPath(dest_path.toUri()); + queryTmpdir = ctx.getExternalTmpPath(dest_path); } if (dpCtx != null) { // set the root of the temporary path where dynamic partition columns will populate @@ -5780,12 +5775,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri() .getAuthority(), partPath.toUri().getPath()); - // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/.. - // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir - // on same namespace as tbl dir. - queryTmpdir = dest_path.toUri().getScheme().equals("viewfs") ? - ctx.getExtTmpPathRelTo(dest_path.getParent().toUri()) : - ctx.getExternalTmpPath(dest_path.toUri()); + queryTmpdir = ctx.getExternalTmpPath(dest_path); table_desc = Utilities.getTableDesc(dest_tab); // Add sorting/bucketing if needed @@ -5842,7 +5832,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) try { Path qPath = FileUtils.makeQualified(dest_path, conf); - queryTmpdir = ctx.getExternalTmpPath(qPath.toUri()); + queryTmpdir = ctx.getExternalTmpPath(qPath); } catch (Exception e) { throw new SemanticException("Error creating temporary folder on: " + dest_path, e); @@ -6003,7 +5993,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) // it should be the same as the MoveWork's sourceDir. fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString()); if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { - String statsTmpLoc = ctx.getExternalTmpPath(queryTmpdir.toUri()).toString(); + String statsTmpLoc = ctx.getExternalTmpPath(queryTmpdir).toString(); LOG.info("Set stats collection dir : " + statsTmpLoc); conf.set(StatsSetupConst.STATS_TMP_LOC, statsTmpLoc); } @@ -9014,7 +9004,7 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String tsDesc.setGatherStats(false); } else { if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { - String statsTmpLoc = ctx.getExternalTmpPath(tab.getPath().toUri()).toString(); + String statsTmpLoc = ctx.getExternalTmpPath(tab.getPath()).toString(); LOG.info("Set stats collection dir : " + statsTmpLoc); conf.set(StatsSetupConst.STATS_TMP_LOC, statsTmpLoc); }