Details
-
Bug
-
Status: Patch Available
-
Minor
-
Resolution: Unresolved
-
3.0.0-alpha1
-
None
-
None
Description
The current implementation of DistributedCacheEmulator.java in gridmix does not follow the practice of trimming configuration values. This leads to errors if users set values containing space or newline.
see the previous commits as reference (just list a few):
HADOOP-6578. Configuration should trim whitespace around a lot of value types
HADOOP-6534. Trim whitespace from directory lists initializing
Patch is available against trunk
HDFS-9708. FSNamesystem.initAuditLoggers() doesn't trim classnames
HDFS-2799. Trim fs.checkpoint.dir values.
YARN-3395. FairScheduler: Trim whitespaces when using username for queuename.
YARN-2869. CapacityScheduler should trim sub queue names when parse configuration.
Patch is available against trunk (tested):
index 72027c1..eb12723 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java @@ -315,14 +315,14 @@ void updateHDFSDistCacheFilesList(JobStory jobdesc) throws IOException { // paths, to be used by this simulated job. JobConf jobConf = jobdesc.getJobConf(); - String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES); + String[] files = jobConf.getTrimmedStrings(MRJobConfig.CACHE_FILES); if (files != null) { - String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES); + String[] fileSizes = jobConf.getTrimmedStrings(MRJobConfig.CACHE_FILES_SIZES); String[] visibilities = - jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES); + jobConf.getTrimmedStrings(MRJobConfig.CACHE_FILE_VISIBILITIES); String[] timeStamps = - jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS); + jobConf.getTrimmedStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS); FileSystem fs = FileSystem.get(conf); String user = jobConf.getUser(); @@ -495,7 +495,7 @@ void configureDistCacheFiles(Configuration conf, JobConf jobConf) throws IOException { if (shouldEmulateDistCacheLoad()) { - String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES); + String[] files = jobConf.getTrimmedStrings(MRJobConfig.CACHE_FILES); if (files != null) { // hdfs based distributed cache files to be configured for simulated job List<String> cacheFiles = new ArrayList<String>(); @@ -504,10 +504,10 @@ void configureDistCacheFiles(Configuration conf, JobConf jobConf) List<String> localCacheFiles = new ArrayList<String>(); String[] visibilities = - jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES); + jobConf.getTrimmedStrings(MRJobConfig.CACHE_FILE_VISIBILITIES); String[] timeStamps = - jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS); - String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES); + jobConf.getTrimmedStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS); + String[] fileSizes = jobConf.getTrimmedStrings(MRJobConfig.CACHE_FILES_SIZES); String user = jobConf.getUser(); for (int i = 0; i < files.length; i++) {