diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index c5428fa..592eb9d 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -22,12 +22,16 @@ import java.io.IOException; import java.net.URI; import java.util.BitSet; import java.util.List; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.util.Shell; + + /** * Collection of file manipulation utilities common across Hive. */ @@ -127,6 +131,12 @@ public final class FileUtils { // won't be corrupt, because the full path name in metastore is stored. // In that case, Hive will continue to read the old data, but when it creates // new partitions, it will use new names. + // edit : There are some use cases for which adding new chars does not seem + // to be backward compatible - Eg. if partition was created with name having + // a special char that you want to start escaping, and then you try dropping + // the partition with a hive version that now escapes the special char using + // the list below, then the drop partition fails to work. + static BitSet charToEscape = new BitSet(128); static { for (char c = 0; c < ' '; c++) { @@ -143,7 +153,13 @@ public final class FileUtils { '\u0013', '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F', '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', '{', - '[', ']', '^', ' ','<','>','|'}; + '[', ']', '^'}; + + if(Shell.WINDOWS){ + //On windows, following chars need to be escaped as well + char [] winClist = {' ', '<','>','|'}; + clist = ArrayUtils.addAll(clist, winClist); + } for (char c : clist) { charToEscape.set(c); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 6b94105..e34b654 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -59,7 +59,7 @@ public class Context { private int resDirFilesNum; boolean initialized; String originalTracker = null; - private final Map pathToCS = new ConcurrentHashMap(); + private final Map pathToCS = new ConcurrentHashMap(); // scratch path to use for all non-local (ie. hdfs) file system tmp folders private final Path nonLocalScratchPath; @@ -275,19 +275,19 @@ public class Context { * @param originalURI uri to localize * @return localized path for map-red intermediate data */ - public String localizeMRTmpFileURI(String originalURI) { - Path o = new Path(originalURI); + public Path localizeMRTmpFileURI(Path originalPath) { + Path mrbase = new Path(getMRScratchDir()); - URI relURI = mrbase.toUri().relativize(o.toUri()); - if (relURI.equals(o.toUri())) { + URI relURI = mrbase.toUri().relativize(originalPath.toUri()); + if (relURI.equals(originalPath.toUri())) { throw new RuntimeException - ("Invalid URI: " + originalURI + ", cannot relativize against" + + ("Invalid URI: " + originalPath + ", cannot relativize against" + mrbase.toString()); } - return getLocalScratchDir(!explain) + Path.SEPARATOR + - relURI.getPath(); + return new Path(getLocalScratchDir(!explain) + Path.SEPARATOR + + relURI.getPath()); } @@ -516,15 +516,15 @@ public class Context { } } - public void addCS(String path, ContentSummary cs) { + public void addCS(Path path, ContentSummary cs) { pathToCS.put(path, cs); } - public ContentSummary getCS(String path) { + public ContentSummary getCS(Path path) { return pathToCS.get(path); } - public Map getPathToCS() { + public Map getPathToCS() { return pathToCS; } @@ -537,10 +537,10 @@ public class Context { * Given a mapping from paths to objects, localize any MR tmp paths * @param map mapping from paths to objects */ - public void localizeKeys(Map map) { - for (Map.Entry entry: map.entrySet()) { - String path = entry.getKey(); - if (isMRTmpFileURI(path)) { + public void localizeKeys(Map map) { + for (Map.Entry entry: map.entrySet()) { + Path path = entry.getKey(); + if (isMRTmpFileURI(path.toUri().toString())) { Object val = entry.getValue(); map.remove(path); map.put(localizeMRTmpFileURI(path), val); @@ -552,12 +552,12 @@ public class Context { * Given a list of paths, localize any MR tmp paths contained therein * @param paths list of paths to be localized */ - public void localizePaths(List paths) { - Iterator iter = paths.iterator(); - List toAdd = new ArrayList (); + public void localizePaths(List paths) { + Iterator iter = paths.iterator(); + List toAdd = new ArrayList (); while(iter.hasNext()) { - String path = iter.next(); - if (isMRTmpFileURI(path)) { + Path path = iter.next(); + if (isMRTmpFileURI(path.toUri().toString())) { iter.remove(); toAdd.add(localizeMRTmpFileURI(path)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java index b791fd0..dea5fc9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java @@ -190,7 +190,7 @@ public class ExecDriver extends Task implements Serializable, Hadoop protected void createTmpDirs() throws IOException { // fix up outputs - Map> pa = work.getPathToAliases(); + Map> pa = work.getPathToAliases(); if (pa != null) { ArrayList> opList = new ArrayList>(); @@ -744,7 +744,7 @@ public class ExecDriver extends Task implements Serializable, Hadoop /** * Handle a empty/null path for a given alias. */ - private static int addInputPath(String path, JobConf job, MapredWork work, String hiveScratchDir, + private static int addInputPath(Path path, JobConf job, MapredWork work, String hiveScratchDir, int numEmptyPaths, boolean isEmptyPath, String alias) throws Exception { // either the directory does not exist or it is empty assert path == null || isEmptyPath; @@ -766,7 +766,7 @@ public class ExecDriver extends Task implements Serializable, Hadoop } if (nonNative) { - FileInputFormat.addInputPaths(job, path); + FileInputFormat.addInputPath(job, path); LOG.info("Add a non-native table " + path); return numEmptyPaths; } @@ -787,28 +787,28 @@ public class ExecDriver extends Task implements Serializable, Hadoop // toggle the work - LinkedHashMap> pathToAliases = work.getPathToAliases(); + LinkedHashMap> pathToAliases = work.getPathToAliases(); if (isEmptyPath) { assert path != null; - pathToAliases.put(newPath.toUri().toString(), pathToAliases.get(path)); + pathToAliases.put(newPath, pathToAliases.get(path)); pathToAliases.remove(path); } else { assert path == null; ArrayList newList = new ArrayList(); newList.add(alias); - pathToAliases.put(newPath.toUri().toString(), newList); + pathToAliases.put(newPath, newList); } work.setPathToAliases(pathToAliases); - LinkedHashMap pathToPartitionInfo = work.getPathToPartitionInfo(); + LinkedHashMap pathToPartitionInfo = work.getPathToPartitionInfo(); if (isEmptyPath) { - pathToPartitionInfo.put(newPath.toUri().toString(), pathToPartitionInfo.get(path)); + pathToPartitionInfo.put(newPath, pathToPartitionInfo.get(path)); pathToPartitionInfo.remove(path); } else { PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone(); - pathToPartitionInfo.put(newPath.toUri().toString(), pDesc); + pathToPartitionInfo.put(newPath, pDesc); } work.setPathToPartitionInfo(pathToPartitionInfo); @@ -824,16 +824,16 @@ public class ExecDriver extends Task implements Serializable, Hadoop throws Exception { int numEmptyPaths = 0; - Set pathsProcessed = new HashSet(); - List pathsToAdd = new LinkedList(); + Set pathsProcessed = new HashSet(); + List pathsToAdd = new LinkedList(); // AliasToWork contains all the aliases for (String oneAlias : work.getAliasToWork().keySet()) { LOG.info("Processing alias " + oneAlias); - List emptyPaths = new ArrayList(); + List emptyPaths = new ArrayList(); // The alias may not have any path - String path = null; - for (String onefile : work.getPathToAliases().keySet()) { + Path path = null; + for (Path onefile : work.getPathToAliases().keySet()) { List aliases = work.getPathToAliases().get(onefile); if (aliases.contains(oneAlias)) { path = onefile; @@ -856,7 +856,7 @@ public class ExecDriver extends Task implements Serializable, Hadoop } // Create a empty file if the directory is empty - for (String emptyPath : emptyPaths) { + for (Path emptyPath : emptyPaths) { numEmptyPaths = addInputPath(emptyPath, job, work, hiveScratchDir, numEmptyPaths, true, oneAlias); } @@ -877,7 +877,7 @@ public class ExecDriver extends Task implements Serializable, Hadoop setInputPaths(job, pathsToAdd); } - private static void setInputPaths(JobConf job, List pathsToAdd) { + private static void setInputPaths(JobConf job, List pathsToAdd) { Path[] addedPaths = FileInputFormat.getInputPaths(job); List toAddPathList = new ArrayList(); if(addedPaths != null) { @@ -885,8 +885,8 @@ public class ExecDriver extends Task implements Serializable, Hadoop toAddPathList.add(added); } } - for(String toAdd: pathsToAdd) { - toAddPathList.add(new Path(toAdd)); + for(Path toAdd: pathsToAdd) { + toAddPathList.add(toAdd); } FileInputFormat.setInputPaths(job, toAddPathList.toArray(new Path[0])); } @@ -905,8 +905,8 @@ public class ExecDriver extends Task implements Serializable, Hadoop protected void localizeMRTmpFilesImpl(Context ctx) { // localize any map-reduce input paths - ctx.localizeKeys((Map) ((Object) work.getPathToAliases())); - ctx.localizeKeys((Map) ((Object) work.getPathToPartitionInfo())); + ctx.localizeKeys((Map) ((Object) work.getPathToAliases())); + ctx.localizeKeys((Map) ((Object) work.getPathToPartitionInfo())); // localize any input paths for maplocal work MapredLocalWork l = work.getMapLocalWork(); @@ -914,8 +914,8 @@ public class ExecDriver extends Task implements Serializable, Hadoop Map m = l.getAliasToFetchWork(); if (m != null) { for (FetchWork fw : m.values()) { - String s = fw.getTblDir(); - if ((s != null) && ctx.isMRTmpFileURI(s)) { + Path s = fw.getTblDir(); + if ((s != null) && ctx.isMRTmpFileURI(s.toUri().toString())) { fw.setTblDir(ctx.localizeMRTmpFileURI(s)); } } @@ -923,7 +923,7 @@ public class ExecDriver extends Task implements Serializable, Hadoop } // fix up outputs - Map> pa = work.getPathToAliases(); + Map> pa = work.getPathToAliases(); if (pa != null) { for (List ls : pa.values()) { for (String a : ls) { @@ -937,7 +937,7 @@ public class ExecDriver extends Task implements Serializable, Hadoop FileSinkDesc fdesc = ((FileSinkOperator) op).getConf(); String s = fdesc.getDirName(); if ((s != null) && ctx.isMRTmpFileURI(s)) { - fdesc.setDirName(ctx.localizeMRTmpFileURI(s)); + fdesc.setDirName(ctx.localizeMRTmpFileURI(new Path(s)).toString()); } ((FileSinkOperator) op).setConf(fdesc); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 0efbe08..2f40f74 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -227,7 +227,7 @@ public class FetchOperator implements Serializable { } return; } else { - iterPath = FetchWork.convertStringToPathArray(work.getPartDir()).iterator(); + iterPath = work.getPartDir().iterator(); iterPartDesc = work.getPartDesc().iterator(); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java index f8373a3..923759c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java @@ -25,6 +25,7 @@ import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CommandNeedRetryException; @@ -186,12 +187,12 @@ public class FetchTask extends Task implements Serializable { @Override protected void localizeMRTmpFilesImpl(Context ctx) { - String s = work.getTblDir(); - if ((s != null) && ctx.isMRTmpFileURI(s)) { + Path s = work.getTblDir(); + if ((s != null) && ctx.isMRTmpFileURI(s.toUri().toString())) { work.setTblDir(ctx.localizeMRTmpFileURI(s)); } - ArrayList ls = work.getPartDir(); + ArrayList ls = work.getPartDir(); if (ls != null) { ctx.localizePaths(ls); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 2553931..5fef78f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -84,7 +84,7 @@ public class MapOperator extends Operator implements Serializable { private Map opCtxMap; private final Set listInputPaths = new HashSet(); - private Map, java.util.ArrayList> operatorToPaths; + private Map, java.util.ArrayList> operatorToPaths; private final Map, MapOpCtx> childrenOpToOpCtxMap = new HashMap, MapOpCtx>(); @@ -92,7 +92,7 @@ public class MapOperator extends Operator implements Serializable { private ArrayList> extraChildrenToClose = null; private static class MapInputPath { - String path; + Path path; String alias; Operator op; @@ -101,7 +101,7 @@ public class MapOperator extends Operator implements Serializable { * @param alias * @param op */ - public MapInputPath(String path, String alias, + public MapInputPath(Path path, String alias, Operator op) { this.path = path; this.alias = alias; @@ -215,7 +215,7 @@ public class MapOperator extends Operator implements Serializable { } private static MapOpCtx initObjectInspector(MapredWork conf, - Configuration hconf, String onefile) throws HiveException, + Configuration hconf, Path onefile) throws HiveException, ClassNotFoundException, InstantiationException, IllegalAccessException, SerDeException { PartitionDesc td = conf.getPathToPartitionInfo().get(onefile); @@ -357,14 +357,13 @@ public class MapOperator extends Operator implements Serializable { ArrayList> children = new ArrayList>(); opCtxMap = new HashMap(); - operatorToPaths = new HashMap, java.util.ArrayList>(); + operatorToPaths = new HashMap, java.util.ArrayList>(); statsMap.put(Counter.DESERIALIZE_ERRORS, deserialize_error_count); try { - for (String onefile : conf.getPathToAliases().keySet()) { + for (Path onefile : conf.getPathToAliases().keySet()) { MapOpCtx opCtx = initObjectInspector(conf, hconf, onefile); - Path onepath = new Path(new Path(onefile).toUri().getPath()); List aliases = conf.getPathToAliases().get(onefile); for (String onealias : aliases) { @@ -375,14 +374,14 @@ public class MapOperator extends Operator implements Serializable { MapInputPath inp = new MapInputPath(onefile, onealias, op); opCtxMap.put(inp, opCtx); if (operatorToPaths.get(op) == null) { - operatorToPaths.put(op, new java.util.ArrayList()); + operatorToPaths.put(op, new java.util.ArrayList()); } operatorToPaths.get(op).add(onefile); op.setParentOperators(new ArrayList>()); op.getParentOperators().add(this); // check for the operators who will process rows coming to this Map // Operator - if (!onepath.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) { + if (!onefile.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) { children.add(op); childrenOpToOpCtxMap.put(op, opCtx); LOG.info("dump " + op.getName() + " " @@ -468,18 +467,17 @@ public class MapOperator extends Operator implements Serializable { Path fpath = new Path((new Path(this.getExecContext().getCurrentInputFile())) .toUri().getPath()); - for (String onefile : conf.getPathToAliases().keySet()) { - Path onepath = new Path(new Path(onefile).toUri().getPath()); + for (Path onepath : conf.getPathToAliases().keySet()) { // check for the operators who will process rows coming to this Map // Operator if (!onepath.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) { - String onealias = conf.getPathToAliases().get(onefile).get(0); + String onealias = conf.getPathToAliases().get(onepath).get(0); Operator op = conf.getAliasToWork().get(onealias); - LOG.info("Processing alias " + onealias + " for file " + onefile); + LOG.info("Processing alias " + onealias + " for file " + onepath); - MapInputPath inp = new MapInputPath(onefile, onealias, op); + MapInputPath inp = new MapInputPath(onepath, onealias, op); setInspectorInput(inp); break; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 9d97c16..7e1cfa3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -756,7 +756,7 @@ public final class Utilities { } public static void addMapWork(MapredWork mr, Table tbl, String alias, Operator work) { - mr.addMapWork(tbl.getDataLocation().getPath(), alias, work, new PartitionDesc( + mr.addMapWork(new Path(tbl.getDataLocation().toString()), alias, work, new PartitionDesc( getTableDesc(tbl), (LinkedHashMap) null)); } @@ -1533,13 +1533,13 @@ public final class Utilities { /** * Add new elements to the classpath. * If an element is not in the local file system (like ASV or S3), it is downloaded - * first, and then added to the classpath + * first, and then added to the classpath * @param newPaths * Array of classpath elements */ - public static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths, + public static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths, Configuration conf) throws Exception { - //iterate through the jars and download to local if needed + //iterate through the jars and download to local if needed ArrayList local = new ArrayList(); for(String path : newPaths){ // By default don't convert to unix @@ -1555,7 +1555,7 @@ public final class Utilities { * @param newPaths * Array of classpath elements */ - public static ClassLoader addLocalToClassPath(ClassLoader cloader, String[] newPaths) + public static ClassLoader addLocalToClassPath(ClassLoader cloader, String[] newPaths) throws Exception { URLClassLoader loader = (URLClassLoader) cloader; List curPath = Arrays.asList(loader.getURLs()); @@ -1740,16 +1740,15 @@ public final class Utilities { long[] summary = {0, 0, 0}; - List pathNeedProcess = new ArrayList(); + List pathNeedProcess = new ArrayList(); // Since multiple threads could call this method concurrently, locking // this method will avoid number of threads out of control. synchronized (getInputSummaryLock) { // For each input path, calculate the total size. - for (String path : work.getPathToAliases().keySet()) { - Path p = new Path(path); + for (Path path : work.getPathToAliases().keySet()) { - if (filter != null && !filter.accept(p)) { + if (filter != null && !filter.accept(path)) { continue; } @@ -1767,7 +1766,7 @@ public final class Utilities { } // Process the case when name node call is needed - final Map resultMap = new ConcurrentHashMap(); + final Map resultMap = new ConcurrentHashMap(); ArrayList> results = new ArrayList>(); final ThreadPoolExecutor executor; int maxThreads = ctx.getConf().getInt("mapred.dfsclient.parallelism.max", 0); @@ -1791,9 +1790,7 @@ public final class Utilities { try { Configuration conf = ctx.getConf(); JobConf jobConf = new JobConf(conf); - for (String path : pathNeedProcess) { - final Path p = new Path(path); - final String pathStr = path; + for (final Path path : pathNeedProcess) { // All threads share the same Configuration and JobConf based on the // assumption that they are thread safe if only read operations are // executed. It is not stated in Hadoop's javadoc, the sourcce codes @@ -1803,7 +1800,7 @@ public final class Utilities { final Configuration myConf = conf; final JobConf myJobConf = jobConf; final PartitionDesc partDesc = work.getPathToPartitionInfo().get( - p.toString()); + path); Runnable r = new Runnable() { public void run() { try { @@ -1814,20 +1811,20 @@ public final class Utilities { InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache( inputFormatCls, myJobConf); if (inputFormatObj instanceof ContentSummaryInputFormat) { - resultCs = ((ContentSummaryInputFormat) inputFormatObj).getContentSummary(p, + resultCs = ((ContentSummaryInputFormat) inputFormatObj).getContentSummary(path, myJobConf); } else { - FileSystem fs = p.getFileSystem(myConf); - resultCs = fs.getContentSummary(p); + FileSystem fs = path.getFileSystem(myConf); + resultCs = fs.getContentSummary(path); } - resultMap.put(pathStr, resultCs); + resultMap.put(path, resultCs); } catch (IOException e) { // We safely ignore this exception for summary data. // We don't update the cache to protect it from polluting other // usages. The worst case is that IOException will always be // retried for another getInputSummary(), which is fine as // IOException is not considered as a common case. - LOG.info("Cannot get size of " + pathStr + ". Safely ignored."); + LOG.info("Cannot get size of " + path + ". Safely ignored."); } } }; @@ -1859,7 +1856,7 @@ public final class Utilities { executor.shutdown(); } HiveInterruptUtils.checkInterrupted(); - for (Map.Entry entry : resultMap.entrySet()) { + for (Map.Entry entry : resultMap.entrySet()) { ContentSummary cs = entry.getValue(); summary[0] += cs.getLength(); @@ -1879,18 +1876,17 @@ public final class Utilities { } } - public static boolean isEmptyPath(JobConf job, String dirPath, Context ctx) + public static boolean isEmptyPath(JobConf job, Path path, Context ctx) throws Exception { - ContentSummary cs = ctx.getCS(dirPath); + ContentSummary cs = ctx.getCS(path); if (cs != null) { - LOG.info("Content Summary " + dirPath + "length: " + cs.getLength() + " num files: " + LOG.info("Content Summary " + path + "length: " + cs.getLength() + " num files: " + cs.getFileCount() + " num directories: " + cs.getDirectoryCount()); return (cs.getLength() == 0 && cs.getFileCount() == 0 && cs.getDirectoryCount() <= 1); } else { - LOG.info("Content Summary not cached for " + dirPath); + LOG.info("Content Summary not cached for " + path); } - Path p = new Path(dirPath); - return isEmptyPath(job, p); + return isEmptyPath(job, path); } public static boolean isEmptyPath(JobConf job, Path dirPath) throws Exception { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java index c9341c4..bed8fa9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java @@ -26,6 +26,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.exec.TaskRunner; @@ -50,14 +51,14 @@ public class HookContext { private LineageInfo linfo; private UserGroupInformation ugi; private HookType hookType; - final private Map inputPathToContentSummary; + final private Map inputPathToContentSummary; public HookContext(QueryPlan queryPlan, HiveConf conf) throws Exception{ - this(queryPlan, conf, new ConcurrentHashMap()); + this(queryPlan, conf, new ConcurrentHashMap()); } public HookContext(QueryPlan queryPlan, HiveConf conf, - Map inputPathToContentSummary) throws Exception { + Map inputPathToContentSummary) throws Exception { this.queryPlan = queryPlan; this.conf = conf; this.inputPathToContentSummary = inputPathToContentSummary; @@ -131,7 +132,7 @@ public class HookContext { this.ugi = ugi; } - public Map getInputPathToContentSummary() { + public Map getInputPathToContentSummary() { return inputPathToContentSummary; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java index 49145b7..ea46150 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java @@ -68,8 +68,7 @@ public class BucketizedHiveInputFormat pathToPartitionInfo = Utilities + Map pathToPartitionInfo = Utilities .getMapRedWork(job).getPathToPartitionInfo(); // extract all the inputFormatClass names for each chunk in the @@ -200,7 +200,7 @@ public class CombineHiveInputFormat pathToPartitionInfo = Utilities + Map pathToPartitionInfo = Utilities .getMapRedWork(getJob()).getPathToPartitionInfo(); // extract all the inputFormatClass names for each chunk in the @@ -259,7 +259,7 @@ public class CombineHiveInputFormat> pathToAliases = mrwork.getPathToAliases(); + Map> pathToAliases = mrwork.getPathToAliases(); Map> aliasToWork = mrwork.getAliasToWork(); CombineFileInputFormatShim combine = ShimLoader.getHadoopShims() @@ -435,8 +435,8 @@ public class CombineHiveInputFormat nameToSamples = mrwork.getNameToSplitSample(); List retLists = new ArrayList(); Map> aliasToSplitList = new HashMap>(); - Map> pathToAliases = mrwork.getPathToAliases(); - Map> pathToAliasesNoScheme = removeScheme(pathToAliases); + Map> pathToAliases = mrwork.getPathToAliases(); + Map> pathToAliasesNoScheme = removeScheme(pathToAliases); // Populate list of exclusive splits for every sampled alias // @@ -503,10 +503,10 @@ public class CombineHiveInputFormat> removeScheme(Map> pathToAliases) { - Map> result = new HashMap>(); - for (Map.Entry > entry : pathToAliases.entrySet()) { - String newKey = new Path(entry.getKey()).toUri().getPath(); + Map> removeScheme(Map> pathToAliases) { + Map> result = new HashMap>(); + for (Map.Entry > entry : pathToAliases.entrySet()) { + Path newKey = new Path(entry.getKey().toUri().getPath()); result.put(newKey, entry.getValue()); } return result; @@ -535,8 +535,7 @@ public class CombineHiveInputFormat pathToPartitionInfo, Path dir, - Map, Map> cacheMap) + Map pathToPartitionInfo, Path dir, + Map, Map> cacheMap) throws IOException { return getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, cacheMap, false); } public static PartitionDesc getPartitionDescFromPathRecursively( - Map pathToPartitionInfo, Path dir, - Map, Map> cacheMap, + Map pathToPartitionInfo, Path dir, + Map, Map> cacheMap, boolean ignoreSchema) throws IOException { PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir); if (part == null - && (ignoreSchema || (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim() - .equals("")))) { + && (ignoreSchema + || (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim() + .equals("")) + || pathsContainNoScheme(pathToPartitionInfo) + ) + + ) { - Map newPathToPartitionInfo = null; + Map newPathToPartitionInfo = null; if (cacheMap != null) { newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo); } if (newPathToPartitionInfo == null) { // still null - newPathToPartitionInfo = new HashMap(); + newPathToPartitionInfo = new HashMap(); populateNewPartitionDesc(pathToPartitionInfo, newPathToPartitionInfo); if (cacheMap != null) { @@ -291,54 +297,110 @@ public final class HiveFileFormatUtils { } } + private static boolean pathsContainNoScheme(Map pathToPartitionInfo) { + + for( Entry pe : pathToPartitionInfo.entrySet()){ + if(pe.getKey().toUri().getScheme() != null){ + return false; + } + } + return true; + + } + private static void populateNewPartitionDesc( - Map pathToPartitionInfo, - Map newPathToPartitionInfo) { - for (Map.Entry entry: pathToPartitionInfo.entrySet()) { - String entryKey = entry.getKey(); + Map pathToPartitionInfo, + Map newPathToPartitionInfo) { + for (Map.Entry entry: pathToPartitionInfo.entrySet()) { + Path newP = entry.getKey(); PartitionDesc partDesc = entry.getValue(); - Path newP = new Path(entryKey); - String pathOnly = newP.toUri().getPath(); - newPathToPartitionInfo.put(pathOnly, partDesc); + newPathToPartitionInfo.put(getPathWithPathOnly(newP), partDesc); } } private static PartitionDesc doGetPartitionDescFromPath( - Map pathToPartitionInfo, Path dir) { + Map pathToPartitionInfo, Path dir) { + + return getValueMatchParentDir(pathToPartitionInfo, dir, true); + } + + /** + * Lookup path p in map, try looking up on parent dir of path if lookup fails. + * If pathOnly is set to true, ignore scheme and authority + * @param map + * @param p + * @param pathOnly + * @return + */ + static VAL getValueMatchParentDir(Map map, Path p, boolean pathOnly){ // We first do exact match, and then do prefix matching. The latter is due to input dir // could be /dir/ds='2001-02-21'/part-03 where part-03 is not part of partition - String dirPath = dir.toUri().getPath(); - PartitionDesc part = pathToPartitionInfo.get(dir.toString()); - if (part == null) { - // LOG.warn("exact match not found, try ripping input path's theme and authority"); - part = pathToPartitionInfo.get(dirPath); + + while(p != null){ + VAL val = map.get(p); + if(val != null) { + return val; + } + //else + //exact match not found, try ripping input path's theme and authority + val = map.get(getPathWithPathOnly(p)); + if(val != null) { + return val; + } + + //now try parent dir + p = p.getParent(); } + return null; + } - if (part == null) { - String dirStr = dir.toString(); - int dirPathIndex = dirPath.lastIndexOf(Path.SEPARATOR); - int dirStrIndex = dirStr.lastIndexOf(Path.SEPARATOR); - while (dirPathIndex >= 0 && dirStrIndex >= 0) { - dirStr = dirStr.substring(0, dirStrIndex); - dirPath = dirPath.substring(0, dirPathIndex); - //first try full match - part = pathToPartitionInfo.get(dirStr); - if (part == null) { - // LOG.warn("exact match not found, try ripping input path's theme and authority"); - part = pathToPartitionInfo.get(dirPath); - } - if (part != null) { - break; - } - dirPathIndex = dirPath.lastIndexOf(Path.SEPARATOR); - dirStrIndex = dirStr.lastIndexOf(Path.SEPARATOR); + /** + * @param p + * @return new Path with scheme and authority of argument p stripped of + */ + static Path getPathWithPathOnly(Path p) { + return new Path(p.toUri().getPath()); + } + + /** + * Check if child is a subdir of parent. Try considering only Path + * if childPathOnly is set to true + * @param parent + * @param child + * @param childPathOnly + * @return true if child arg is child of parent arg + */ + static boolean isChildOf(Path parent, Path child, boolean childPathOnly) { + do{ + if(equalsPath(parent, child, childPathOnly)){ + return true; } + child = child.getParent(); + }while(child != null); + + return false; + } + + /** + * Compare path and splitPath, try considering splitPath path part if + * pathOnly is true + * @param path + * @param splitPath + * @param pathOnly + * @return true if equal + */ + static boolean equalsPath(Path path, Path splitPath, boolean pathOnly) { + if(splitPath.equals(path)){ + return true; + } + if(pathOnly && splitPath.toUri().getPath().equals(path.toString())){ + return true; } - return part; + return false; } - private static boolean foundAlias(Map> pathToAliases, - String path) { + private static boolean foundAlias(Map> pathToAliases, + Path path) { List aliases = pathToAliases.get(path); if ((aliases == null) || (aliases.isEmpty())) { return false; @@ -346,37 +408,22 @@ public final class HiveFileFormatUtils { return true; } - private static String getMatchingPath(Map> pathToAliases, + private static Path getMatchingPath(Map> pathToAliases, Path dir) { - // First find the path to be searched - String path = dir.toString(); - if (foundAlias(pathToAliases, path)) { - return path; - } - String dirPath = dir.toUri().getPath(); - if (foundAlias(pathToAliases, dirPath)) { - return dirPath; - } - path = dirPath; - - String dirStr = dir.toString(); - int dirPathIndex = dirPath.lastIndexOf(Path.SEPARATOR); - int dirStrIndex = dirStr.lastIndexOf(Path.SEPARATOR); - while (dirPathIndex >= 0 && dirStrIndex >= 0) { - dirStr = dirStr.substring(0, dirStrIndex); - dirPath = dirPath.substring(0, dirPathIndex); - //first try full match - if (foundAlias(pathToAliases, dirStr)) { - return dirStr; + while(dir != null){ + if(foundAlias(pathToAliases,dir)){ + return dir; } - if (foundAlias(pathToAliases, dirPath)) { - return dirPath; + //path without scheme, authority + Path onlyPath = new Path(dir.toUri().getPath()); + if(foundAlias(pathToAliases, onlyPath)){ + return onlyPath; } - dirPathIndex = dirPath.lastIndexOf(Path.SEPARATOR); - dirStrIndex = dirStr.lastIndexOf(Path.SEPARATOR); + //lookup parent of dir + dir = dir.getParent(); } - return null; + return dir; } /** @@ -386,9 +433,10 @@ public final class HiveFileFormatUtils { * @param dir The path to look for **/ public static List> doGetWorksFromPath( - Map> pathToAliases, + Map> pathToAliases, Map> aliasToWork, Path dir) { - List> opList = + + List> opList = new ArrayList>(); List aliases = doGetAliasesFromPath(pathToAliases, dir); @@ -404,12 +452,12 @@ public final class HiveFileFormatUtils { * @param dir The path to look for **/ public static List doGetAliasesFromPath( - Map> pathToAliases, + Map> pathToAliases, Path dir) { if (pathToAliases == null) { return new ArrayList(); } - String path = getMatchingPath(pathToAliases, dir); + Path path = getMatchingPath(pathToAliases, dir); return pathToAliases.get(path); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 08362a6..5811d6f 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -230,8 +230,7 @@ public class HiveInputFormat nonNative = part.getTableDesc().isNonNative(); } - pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath() - .toString(), hsplit.getPath().toUri().getPath(), nonNative); + pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath(), nonNative); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, cloneJobConf); @@ -248,7 +247,7 @@ public class HiveInputFormat return rr; } - protected Map pathToPartitionInfo; + protected Map pathToPartitionInfo; MapredWork mrwork = null; protected void init(JobConf job) { @@ -325,11 +324,11 @@ public class HiveInputFormat } protected static PartitionDesc getPartitionDescFromPath( - Map pathToPartitionInfo, Path dir) + Map pathToPartitionInfo, Path dir) throws IOException { - PartitionDesc partDesc = pathToPartitionInfo.get(dir.toString()); + PartitionDesc partDesc = pathToPartitionInfo.get(dir); if (partDesc == null) { - partDesc = pathToPartitionInfo.get(dir.toUri().getPath()); + partDesc = pathToPartitionInfo.get(new Path(dir.toUri().getPath())); } if (partDesc == null) { throw new IOException("cannot find dir = " + dir.toString() @@ -370,13 +369,12 @@ public class HiveInputFormat } protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass, - String splitPath, String splitPathWithNoSchema) { - pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath, - splitPathWithNoSchema, false); + Path splitPath) { + pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath, false); } protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass, - String splitPath, String splitPathWithNoSchema, boolean nonNative) { + Path splitPath, boolean nonNative) { if (this.mrwork == null) { init(job); } @@ -386,26 +384,24 @@ public class HiveInputFormat } ArrayList aliases = new ArrayList(); - Iterator>> iterator = this.mrwork + Iterator>> iterator = this.mrwork .getPathToAliases().entrySet().iterator(); while (iterator.hasNext()) { - Entry> entry = iterator.next(); - String key = entry.getKey(); + Entry> entry = iterator.next(); + Path key = entry.getKey(); boolean match; if (nonNative) { // For non-native tables, we need to do an exact match to avoid // HIVE-1903. (The table location contains no files, and the string // representation of its path does not have a trailing slash.) - match = - splitPath.equals(key) || splitPathWithNoSchema.equals(key); + match = HiveFileFormatUtils.equalsPath(key, splitPath, true); } else { // But for native tables, we need to do a prefix match for // subdirectories. (Unlike non-native tables, prefix mixups don't seem // to be a potential problem here since we are always dealing with the // path to something deeper than the table location.) - match = - splitPath.startsWith(key) || splitPathWithNoSchema.startsWith(key); + match = HiveFileFormatUtils.isChildOf(key, splitPath, true); } if (match) { ArrayList list = entry.getValue(); @@ -433,4 +429,6 @@ public class HiveInputFormat } } } + + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java b/ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java index f4a21f8..1cf7721 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java @@ -21,17 +21,17 @@ package org.apache.hadoop.hive.ql.io; import java.util.HashMap; import java.util.Map; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.plan.PartitionDesc; - /** * IOPrepareCache is used to cache pre-query io-related objects. * It should be cleared every time a new query issued. - * + * */ public class IOPrepareCache { - + private static ThreadLocal threadLocalIOPrepareCache = new ThreadLocal(); - + public static IOPrepareCache get() { IOPrepareCache cache = IOPrepareCache.threadLocalIOPrepareCache.get(); if (cache == null) { @@ -41,29 +41,29 @@ public class IOPrepareCache { return cache; } - + public void clear() { if(partitionDescMap != null) { - partitionDescMap.clear(); + partitionDescMap.clear(); } } - - private Map, Map> partitionDescMap; - - public Map, Map> allocatePartitionDescMap() { + + private Map, Map> partitionDescMap; + + public Map, Map> allocatePartitionDescMap() { if (partitionDescMap == null) { - partitionDescMap = new HashMap, Map>(); + partitionDescMap = new HashMap, Map>(); } return partitionDescMap; } - public Map, Map> getPartitionDescMap() { + public Map, Map> getPartitionDescMap() { return partitionDescMap; } public void setPartitionDescMap( - Map, Map> partitionDescMap) { + Map, Map> partitionDescMap) { this.partitionDescMap = partitionDescMap; - } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java index 9ae58f4..c353f71 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java @@ -37,28 +37,28 @@ import org.apache.hadoop.mapred.TextInputFormat; public class SymbolicInputFormat implements ReworkMapredInputFormat { public void rework(HiveConf job, MapredWork work) throws IOException { - Map pathToParts = work.getPathToPartitionInfo(); - List toRemovePaths = new ArrayList(); - Map toAddPathToPart = new HashMap(); - Map> pathToAliases = work.getPathToAliases(); + Map pathToParts = work.getPathToPartitionInfo(); + List toRemovePaths = new ArrayList(); + Map toAddPathToPart = new HashMap(); + Map> pathToAliases = work.getPathToAliases(); - for (Map.Entry pathPartEntry : pathToParts + for (Map.Entry pathPartEntry : pathToParts .entrySet()) { - String path = pathPartEntry.getKey(); + Path path = pathPartEntry.getKey(); PartitionDesc partDesc = pathPartEntry.getValue(); // this path points to a symlink path if (partDesc.getInputFileFormatClass().equals( SymlinkTextInputFormat.class)) { // change to TextInputFormat partDesc.setInputFileFormatClass(TextInputFormat.class); - Path symlinkDir = new Path(path); - FileSystem fileSystem = symlinkDir.getFileSystem(job); - FileStatus fStatus = fileSystem.getFileStatus(symlinkDir); + + FileSystem fileSystem = path.getFileSystem(job); + FileStatus fStatus = fileSystem.getFileStatus(path); FileStatus[] symlinks = null; if (!fStatus.isDir()) { symlinks = new FileStatus[] { fStatus }; } else { - symlinks = fileSystem.listStatus(symlinkDir); + symlinks = fileSystem.listStatus(path); } toRemovePaths.add(path); ArrayList aliases = pathToAliases.remove(path); @@ -74,8 +74,8 @@ public class SymbolicInputFormat implements ReworkMapredInputFormat { while ((line = reader.readLine()) != null) { // no check for the line? How to check? // if the line is invalid for any reason, the job will fail. - toAddPathToPart.put(line, partDesc); - pathToAliases.put(line, aliases); + toAddPathToPart.put(new Path(line), partDesc); + pathToAliases.put(new Path(line), aliases); } } finally { org.apache.hadoop.io.IOUtils.closeStream(reader); @@ -85,7 +85,7 @@ public class SymbolicInputFormat implements ReworkMapredInputFormat { } pathToParts.putAll(toAddPathToPart); - for (String toRemove : toRemovePaths) { + for (Path toRemove : toRemovePaths) { pathToParts.remove(toRemove); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java index c4960f9..9012aae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java @@ -38,18 +38,18 @@ public class MergeWork extends MapredWork implements Serializable { private static final long serialVersionUID = 1L; - private List inputPaths; + private List inputPaths; private String outputDir; private boolean hasDynamicPartitions; public MergeWork() { } - public MergeWork(List inputPaths, String outputDir) { + public MergeWork(List inputPaths, String outputDir) { this(inputPaths, outputDir, false); } - public MergeWork(List inputPaths, String outputDir, + public MergeWork(List inputPaths, String outputDir, boolean hasDynamicPartitions) { super(); this.inputPaths = inputPaths; @@ -58,21 +58,21 @@ public class MergeWork extends MapredWork implements Serializable { PartitionDesc partDesc = new PartitionDesc(); partDesc.setInputFileFormatClass(RCFileBlockMergeInputFormat.class); if(this.getPathToPartitionInfo() == null) { - this.setPathToPartitionInfo(new LinkedHashMap()); + this.setPathToPartitionInfo(new LinkedHashMap()); } if(this.getNumReduceTasks() == null) { this.setNumReduceTasks(0); } - for(String path: this.inputPaths) { + for(Path path: this.inputPaths) { this.getPathToPartitionInfo().put(path, partDesc); } } - public List getInputPaths() { + public List getInputPaths() { return inputPaths; } - public void setInputPaths(List inputPaths) { + public void setInputPaths(List inputPaths) { this.inputPaths = inputPaths; } @@ -88,14 +88,17 @@ public class MergeWork extends MapredWork implements Serializable { return RCFileMergeMapper.class; } + @Override public Long getMinSplitSize() { return null; } + @Override public String getInputformat() { return CombineHiveInputFormat.class.getName(); } + @Override public boolean isGatheringStats() { return false; } @@ -123,7 +126,7 @@ public class MergeWork extends MapredWork implements Serializable { super.resolveDynamicPartitionMerge(conf, path, tblDesc, aliases, partDesc); // Add the DP path to the list of input paths - inputPaths.add(path.toString()); + inputPaths.add(path); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java index 0b05b3f..6174a0e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java @@ -459,8 +459,8 @@ public class GenMRFileSink1 implements NodeProcessor { // constructing the default MapredWork MapredWork cplan = GenMapRedUtils.getMapRedWorkFromConf(conf); - cplan.getPathToAliases().put(inputDir, aliases); - cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null)); + cplan.getPathToAliases().put(new Path(inputDir), aliases); + cplan.getPathToPartitionInfo().put(new Path(inputDir), new PartitionDesc(tblDesc, null)); cplan.setNumReduceTasks(0); cplan.getAliasToWork().put(inputDir, topOp); cplan.setMapperCannotSpanPartns(true); @@ -489,15 +489,15 @@ public class GenMRFileSink1 implements NodeProcessor { MergeWork work = new MergeWork(inputDirs, finalName, hasDynamicPartitions); - LinkedHashMap> pathToAliases = - new LinkedHashMap>(); - pathToAliases.put(inputDir, (ArrayList) inputDirs.clone()); + LinkedHashMap> pathToAliases = + new LinkedHashMap>(); + pathToAliases.put(new Path(inputDir), (ArrayList) inputDirs.clone()); work.setMapperCannotSpanPartns(true); work.setPathToAliases(pathToAliases); work.setAliasToWork( new LinkedHashMap>()); if (hasDynamicPartitions) { - work.getPathToPartitionInfo().put(inputDir, + work.getPathToPartitionInfo().put(new Path(inputDir), new PartitionDesc(tblDesc, null)); } @@ -690,9 +690,9 @@ public class GenMRFileSink1 implements NodeProcessor { String taskTmpDir = mjCtx.getTaskTmpDir(); TableDesc tt_desc = mjCtx.getTTDesc(); assert plan.getPathToAliases().get(taskTmpDir) == null; - plan.getPathToAliases().put(taskTmpDir, new ArrayList()); + plan.getPathToAliases().put(new Path(taskTmpDir), new ArrayList()); plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, + plan.getPathToPartitionInfo().put(new Path(taskTmpDir), new PartitionDesc(tt_desc, null)); plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); return dest; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index b51c6fd..4b499a7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -31,6 +31,7 @@ import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -152,13 +153,13 @@ public class MapJoinProcessor implements Transform { smallTableAliasList.add(alias); // get input path and remove this alias from pathToAlias // because this file will be fetched by fetch operator - LinkedHashMap> pathToAliases = newWork.getPathToAliases(); + LinkedHashMap> pathToAliases = newWork.getPathToAliases(); // keep record all the input path for this alias - HashSet pathSet = new HashSet(); - HashSet emptyPath = new HashSet(); - for (Map.Entry> entry2 : pathToAliases.entrySet()) { - String path = entry2.getKey(); + HashSet pathSet = new HashSet(); + HashSet emptyPath = new HashSet(); + for (Map.Entry> entry2 : pathToAliases.entrySet()) { + Path path = entry2.getKey(); ArrayList list = entry2.getValue(); if (list.contains(alias)) { // add to path set @@ -173,16 +174,16 @@ public class MapJoinProcessor implements Transform { } } //remove the path, with which no alias associates - for (String path : emptyPath) { + for (Path path : emptyPath) { pathToAliases.remove(path); } // create fetch work FetchWork fetchWork = null; - List partDir = new ArrayList(); + List partDir = new ArrayList(); List partDesc = new ArrayList(); - for (String tablePath : pathSet) { + for (Path tablePath : pathSet) { PartitionDesc partitionDesc = newWork.getPathToPartitionInfo().get(tablePath); // create fetchwork for non partitioned table if (partitionDesc.getPartSpec() == null || partitionDesc.getPartSpec().size() == 0) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java index bf9f18d..5ef4769 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java @@ -21,11 +21,11 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Stack; -import java.util.Iterator; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -48,7 +49,6 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; import org.apache.hadoop.hive.serde2.NullStructSerDe; /** @@ -178,7 +178,7 @@ public class MetadataOnlyOptimizer implements PhysicalPlanResolver { */ class MetadataOnlyTaskDispatcher implements Dispatcher { - private PhysicalContext physicalContext; + private final PhysicalContext physicalContext; public MetadataOnlyTaskDispatcher(PhysicalContext context) { super(); @@ -206,10 +206,10 @@ public class MetadataOnlyOptimizer implements PhysicalPlanResolver { return desc; } - private List getPathsForAlias(MapredWork work, String alias) { - List paths = new ArrayList(); + private List getPathsForAlias(MapredWork work, String alias) { + List paths = new ArrayList(); - for (Map.Entry> entry : work.getPathToAliases().entrySet()) { + for (Map.Entry> entry : work.getPathToAliases().entrySet()) { if (entry.getValue().contains(alias)) { paths.add(entry.getKey()); } @@ -223,17 +223,17 @@ public class MetadataOnlyOptimizer implements PhysicalPlanResolver { PartitionDesc aliasPartn = work.getAliasToPartnInfo().get(alias); changePartitionToMetadataOnly(aliasPartn); - List paths = getPathsForAlias(work, alias); - for (String path : paths) { + List paths = getPathsForAlias(work, alias); + for (Path path : paths) { PartitionDesc newPartition = changePartitionToMetadataOnly(work.getPathToPartitionInfo().get( path)); Path fakePath = new Path("file", null, "/fake-path-metadata-only-query-" + newPartition.getTableName() + newPartition.getPartSpec().toString()); work.getPathToPartitionInfo().remove(path); - work.getPathToPartitionInfo().put(fakePath.getName(), newPartition); + work.getPathToPartitionInfo().put(fakePath, newPartition); ArrayList aliases = work.getPathToAliases().remove(path); - work.getPathToAliases().put(fakePath.getName(), aliases); + work.getPathToAliases().put(fakePath, aliases); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 5979c02..3eedf08 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -27,9 +27,9 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; -import java.util.Map.Entry; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -96,6 +97,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; import org.apache.hadoop.hive.ql.optimizer.GenMROperator; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; @@ -105,7 +107,6 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.Optimizer; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -126,6 +127,7 @@ import org.apache.hadoop.hive.ql.plan.ExtractDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; @@ -148,14 +150,13 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; @@ -163,16 +164,15 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.hive.metastore.TableType; /** * Implementation of the semantic analyzer. @@ -6968,7 +6968,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { Table tab = (iter.next()).getValue(); if (!tab.isPartitioned()) { if (qbParseInfo.getDestToWhereExpr().isEmpty()) { - fetch = new FetchWork(tab.getPath().toString(), Utilities + fetch = new FetchWork(tab.getPath(), Utilities .getTableDesc(tab), qb.getParseInfo().getOuterQueryLimit()); noMapRed = true; inputs.add(new ReadEntity(tab)); @@ -7001,7 +7001,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // If there is any unknown partition, create a map-reduce job for // the filter to prune correctly if ((partsList.getUnknownPartns().size() == 0)) { - List listP = new ArrayList(); + List listP = new ArrayList(); List partP = new ArrayList(); Set parts = partsList.getConfirmedPartns(); @@ -7009,7 +7009,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { while (iterParts.hasNext()) { Partition part = iterParts.next(); - listP.add(part.getPartitionPath().toString()); + listP.add(part.getPartitionPath()); try { partP.add(Utilities.getPartitionDesc(part)); } catch (HiveException e) { @@ -7126,7 +7126,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT); TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat); - fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(), + fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()), resultTab, qb.getParseInfo().getOuterQueryLimit()); fetchTask = (FetchTask) TaskFactory.get(fetch, conf); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java index 5c09789..502a1eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java @@ -26,8 +26,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -142,14 +140,14 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver, dpCtx.getNumDPCols(), inpFs); // cleanup pathToPartitionInfo - Map ptpi = work.getPathToPartitionInfo(); + Map ptpi = work.getPathToPartitionInfo(); assert ptpi.size() == 1; - String path = ptpi.keySet().iterator().next(); + Path path = ptpi.keySet().iterator().next(); TableDesc tblDesc = ptpi.get(path).getTableDesc(); ptpi.remove(path); // the root path is not useful anymore // cleanup pathToAliases - Map> pta = work.getPathToAliases(); + Map> pta = work.getPathToAliases(); assert pta.size() == 1; path = pta.keySet().iterator().next(); ArrayList aliases = pta.get(path); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java index 481a51c..fb4ebfd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java @@ -32,10 +32,10 @@ import org.apache.hadoop.fs.Path; public class FetchWork implements Serializable { private static final long serialVersionUID = 1L; - private String tblDir; + private Path tblDir; private TableDesc tblDesc; - private ArrayList partDir; + private ArrayList partDir; private ArrayList partDesc; private int limit; @@ -49,22 +49,22 @@ public class FetchWork implements Serializable { public FetchWork() { } - public FetchWork(String tblDir, TableDesc tblDesc) { + public FetchWork(Path tblDir, TableDesc tblDesc) { this(tblDir, tblDesc, -1); } - public FetchWork(String tblDir, TableDesc tblDesc, int limit) { + public FetchWork(Path tblDir, TableDesc tblDesc, int limit) { this.tblDir = tblDir; this.tblDesc = tblDesc; this.limit = limit; } - public FetchWork(List partDir, List partDesc) { + public FetchWork(List partDir, List partDesc) { this(partDir, partDesc, -1); } - public FetchWork(List partDir, List partDesc, int limit) { - this.partDir = new ArrayList(partDir); + public FetchWork(List partDir, List partDesc, int limit) { + this.partDir = new ArrayList(partDir); this.partDesc = new ArrayList(partDesc); this.limit = limit; } @@ -80,7 +80,7 @@ public class FetchWork implements Serializable { /** * @return the tblDir */ - public String getTblDir() { + public Path getTblDir() { return tblDir; } @@ -88,14 +88,14 @@ public class FetchWork implements Serializable { * @return the tblDir */ public Path getTblDirPath() { - return new Path(tblDir); + return tblDir; } /** * @param tblDir * the tblDir to set */ - public void setTblDir(String tblDir) { + public void setTblDir(Path tblDir) { this.tblDir = tblDir; } @@ -117,13 +117,13 @@ public class FetchWork implements Serializable { /** * @return the partDir */ - public ArrayList getPartDir() { + public ArrayList getPartDir() { return partDir; } - public List getPartDirPath() { - return FetchWork.convertStringToPathArray(partDir); - } +// public List getPartDirPath() { +// return FetchWork.convertStringToPathArray(partDir); +// } public static List convertPathToStringArray(List paths) { if (paths == null) { @@ -138,24 +138,24 @@ public class FetchWork implements Serializable { return pathsStr; } - public static List convertStringToPathArray(List paths) { - if (paths == null) { - return null; - } - - List pathsStr = new ArrayList(); - for (String path : paths) { - pathsStr.add(new Path(path)); - } - - return pathsStr; - } +// public static List convertStringToPathArray(List paths) { +// if (paths == null) { +// return null; +// } +// +// List pathsStr = new ArrayList(); +// for (String path : paths) { +// pathsStr.add(new Path(path)); +// } +// +// return pathsStr; +// } /** * @param partDir * the partDir to set */ - public void setPartDir(ArrayList partDir) { + public void setPartDir(ArrayList partDir) { this.partDir = partDir; } @@ -208,11 +208,11 @@ public class FetchWork implements Serializable { return "null fetchwork"; } - String ret = "partition = "; - for (String part : partDir) { - ret = ret.concat(part); + StringBuilder ret = new StringBuilder("partition = "); + for (Path part : partDir) { + ret.append(part); } - return ret; + return ret.toString(); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index 2c2a3ec..483e6c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -45,9 +45,9 @@ public class MapredWork implements Serializable { // map side work // use LinkedHashMap to make sure the iteration order is // deterministic, to ease testing - private LinkedHashMap> pathToAliases; + private LinkedHashMap> pathToAliases; - private LinkedHashMap pathToPartitionInfo; + private LinkedHashMap pathToPartitionInfo; private LinkedHashMap> aliasToWork; @@ -96,8 +96,8 @@ public class MapredWork implements Serializable { public MapredWork( final String command, - final LinkedHashMap> pathToAliases, - final LinkedHashMap pathToPartitionInfo, + final LinkedHashMap> pathToAliases, + final LinkedHashMap pathToPartitionInfo, final LinkedHashMap> aliasToWork, final TableDesc keyDesc, List tagToValueDesc, final Operator reducer, final Integer numReduceTasks, @@ -129,22 +129,22 @@ public class MapredWork implements Serializable { } @Explain(displayName = "Path -> Alias", normalExplain = false) - public LinkedHashMap> getPathToAliases() { + public LinkedHashMap> getPathToAliases() { return pathToAliases; } public void setPathToAliases( - final LinkedHashMap> pathToAliases) { + final LinkedHashMap> pathToAliases) { this.pathToAliases = pathToAliases; } @Explain(displayName = "Path -> Partition", normalExplain = false) - public LinkedHashMap getPathToPartitionInfo() { + public LinkedHashMap getPathToPartitionInfo() { return pathToPartitionInfo; } public void setPathToPartitionInfo( - final LinkedHashMap pathToPartitionInfo) { + final LinkedHashMap pathToPartitionInfo) { this.pathToPartitionInfo = pathToPartitionInfo; } @@ -249,7 +249,7 @@ public class MapredWork implements Serializable { } @SuppressWarnings("nls") - public void addMapWork(String path, String alias, Operator work, + public void addMapWork(Path path, String alias, Operator work, PartitionDesc pd) { ArrayList curAliases = pathToAliases.get(path); if (curAliases == null) { @@ -315,7 +315,7 @@ public class MapredWork implements Serializable { */ public void deriveExplainAttributes() { if (pathToPartitionInfo != null) { - for (Map.Entry entry : pathToPartitionInfo + for (Map.Entry entry : pathToPartitionInfo .entrySet()) { entry.getValue().deriveBaseFileName(entry.getKey()); } @@ -450,8 +450,8 @@ public class MapredWork implements Serializable { public void resolveDynamicPartitionMerge(HiveConf conf, Path path, TableDesc tblDesc, ArrayList aliases, PartitionDesc partDesc) { - pathToAliases.put(path.toString(), aliases); - pathToPartitionInfo.put(path.toString(), partDesc); + pathToAliases.put(path, aliases); + pathToPartitionInfo.put(path, partDesc); } public List> getAllOperators() { @@ -462,7 +462,7 @@ public class MapredWork implements Serializable { opList.add(getReducer()); } - Map> pa = getPathToAliases(); + Map> pa = getPathToAliases(); if (pa != null) { for (List ls : pa.values()) { for (String a : ls) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 02d10cd..afa8bf1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -265,19 +265,18 @@ public class PartitionDesc implements Serializable, Cloneable { * @param path * URI to the partition file */ - void deriveBaseFileName(String path) { + void deriveBaseFileName(Path path) { PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc); if (path == null) { return; } try { - Path p = new Path(path); - baseFileName = p.getName(); + baseFileName = path.getName(); } catch (Exception ex) { // don't really care about the exception. the goal is to capture the // the last component at the minimum - so set to the complete path - baseFileName = path; + baseFileName = path.toUri().getPath(); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 477955a..fde1c29 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -29,6 +29,7 @@ import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -86,8 +87,8 @@ public final class PlanUtils { @SuppressWarnings("nls") public static MapredWork getMapRedWork() { try { - return new MapredWork("", new LinkedHashMap>(), - new LinkedHashMap(), + return new MapredWork("", new LinkedHashMap>(), + new LinkedHashMap(), new LinkedHashMap>(), new TableDesc(), new ArrayList(), null, Integer.valueOf(1), null, Hive.get().getConf().getBoolVar( diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java index 9be62b9..a4dad91 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java @@ -188,9 +188,10 @@ public class TestSymlinkTextInputFormat extends TestCase { assertEquals(1, retSplits.length); } catch (Exception e) { e.printStackTrace(); + fail("Caught exception " + e); } finally { if (tblCreated) { - drv.run("drop table text_symlink_text;").getResponseCode(); + drv.run("drop table text_symlink_text").getResponseCode(); } } }