Index: hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java =================================================================== --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java (revision 1553986) +++ hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java (working copy) @@ -377,13 +377,13 @@ org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); FetchWork work; if (tbl.getPartCols().isEmpty()) { - work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); + work = new FetchWork(new Path(tbl.getDataLocation()), Utilities.getTableDesc(tbl)); } else { List partitions = hive.getPartitions(tbl); List partDesc = new ArrayList(); - List partLocs = new ArrayList(); + List partLocs = new ArrayList(); for (Partition part : partitions) { - partLocs.add(part.getLocation()); + partLocs.add(part.getPartitionPath()); partDesc.add(Utilities.getPartitionDesc(part)); } work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); Index: hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java =================================================================== --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java (revision 1553986) +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java (working copy) @@ -379,15 +379,15 @@ if (!tbl.getPartCols().isEmpty()) { List partitions = hive.getPartitions(tbl); List partDesc = new ArrayList(); - List partLocs = new ArrayList(); + List partLocs = new ArrayList(); for (Partition part : partitions) { - partLocs.add(part.getLocation()); + partLocs.add(part.getPartitionPath()); partDesc.add(Utilities.getPartitionDesc(part)); } work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); work.setLimit(100); } else { - work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); + work = new FetchWork(new Path(tbl.getDataLocation()), Utilities.getTableDesc(tbl)); } FetchTask task = new FetchTask(); task.setWork(work); Index: ql/src/test/results/compiler/plan/join2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join2.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/join2.q.xml (working copy) @@ -141,9 +141,6 @@ - - #### A masked pattern was here #### - @@ -935,7 +932,7 @@ #### A masked pattern was here #### - -mr-10002 + -mr-10001 org.apache.hadoop.mapred.SequenceFileInputFormat Index: ql/src/test/results/compiler/plan/input2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input2.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/input2.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10006 + -ext-10003 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - @@ -744,7 +741,7 @@ #### A masked pattern was here #### - -ext-10007 + -ext-10004 org.apache.hadoop.mapred.TextInputFormat @@ -901,9 +898,6 @@ - - #### A masked pattern was here #### - @@ -1228,7 +1222,7 @@ #### A masked pattern was here #### - -ext-10008 + -ext-10005 org.apache.hadoop.mapred.TextInputFormat @@ -1394,9 +1388,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/join3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join3.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/join3.q.xml (working copy) @@ -137,9 +137,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/input3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input3.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/input3.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10007 + -ext-10004 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - @@ -744,7 +741,7 @@ #### A masked pattern was here #### - -ext-10008 + -ext-10005 org.apache.hadoop.mapred.TextInputFormat @@ -901,9 +898,6 @@ - - #### A masked pattern was here #### - @@ -1228,7 +1222,7 @@ #### A masked pattern was here #### - -ext-10009 + -ext-10006 org.apache.hadoop.mapred.TextInputFormat @@ -1394,9 +1388,6 @@ - - #### A masked pattern was here #### - @@ -1632,7 +1623,7 @@ #### A masked pattern was here #### - -ext-10010 + -ext-10007 org.apache.hadoop.mapred.TextInputFormat Index: ql/src/test/results/compiler/plan/input4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input4.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/input4.q.xml (working copy) @@ -137,9 +137,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/input5.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input5.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/input5.q.xml (working copy) @@ -137,9 +137,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/input6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input6.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/input6.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/input7.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input7.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/input7.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/input_testsequencefile.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input_testsequencefile.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/input_testsequencefile.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.SequenceFileInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/input9.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input9.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/input9.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/groupby1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/groupby1.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/groupby1.q.xml (working copy) @@ -137,9 +137,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/case_sensitivity.q.xml =================================================================== --- ql/src/test/results/compiler/plan/case_sensitivity.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/case_sensitivity.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/sample2.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample2.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/sample2.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/sample3.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample3.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/sample3.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/sample4.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample4.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/sample4.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/sample5.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample5.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/sample5.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/sample6.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample6.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/sample6.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/sample7.q.xml =================================================================== --- ql/src/test/results/compiler/plan/sample7.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/sample7.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/join1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/join1.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/join1.q.xml (working copy) @@ -137,9 +137,6 @@ - - #### A masked pattern was here #### - Index: ql/src/test/results/compiler/plan/input1.q.xml =================================================================== --- ql/src/test/results/compiler/plan/input1.q.xml (revision 1553986) +++ ql/src/test/results/compiler/plan/input1.q.xml (working copy) @@ -264,7 +264,7 @@ #### A masked pattern was here #### - -ext-10002 + -ext-10001 org.apache.hadoop.mapred.TextInputFormat @@ -421,9 +421,6 @@ - - #### A masked pattern was here #### - Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (working copy) @@ -26,6 +26,7 @@ import java.util.Set; import java.util.Stack; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -173,10 +174,10 @@ Task currTask, QBParseInfo parseInfo, StatsWork statsWork, Task statsTask) throws SemanticException { String aggregationKey = op.getConf().getStatsAggPrefix(); - List inputPaths = new ArrayList(); + List inputPaths = new ArrayList(); switch (parseInfo.getTableSpec().specType) { case TABLE_ONLY: - inputPaths.add(parseInfo.getTableSpec().tableHandle.getPath().toString()); + inputPaths.add(parseInfo.getTableSpec().tableHandle.getPath()); break; case STATIC_PARTITION: Partition part = parseInfo.getTableSpec().partHandle; @@ -186,7 +187,7 @@ throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg( part.getPartitionPath().toString() + e.getMessage())); } - inputPaths.add(part.getPartitionPath().toString()); + inputPaths.add(part.getPartitionPath()); break; default: assert false; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy) @@ -515,10 +515,10 @@ TableDesc tblDesc = fsInputDesc.getTableInfo(); if (tblDesc.getInputFileFormatClass().equals(RCFileInputFormat.class)) { - ArrayList inputDirs = new ArrayList(); + ArrayList inputDirs = new ArrayList(1); if (!hasDynamicPartitions && !isSkewedStoredAsDirs(fsInputDesc)) { - inputDirs.add(inputDir); + inputDirs.add(new Path(inputDir)); } MergeWork work = new MergeWork(inputDirs, finalName, @@ -620,16 +620,15 @@ // find the move task for (Task mvTsk : mvTasks) { MoveWork mvWork = mvTsk.getWork(); - String srcDir = null; + Path srcDir = null; if (mvWork.getLoadFileWork() != null) { - srcDir = mvWork.getLoadFileWork().getSourceDir(); + srcDir = mvWork.getLoadFileWork().getSourcePath(); } else if (mvWork.getLoadTableWork() != null) { - srcDir = mvWork.getLoadTableWork().getSourceDir(); + srcDir = mvWork.getLoadTableWork().getSourcePath(); } - String fsOpDirName = fsOp.getConf().getFinalDirName(); if ((srcDir != null) - && (srcDir.equalsIgnoreCase(fsOpDirName))) { + && (srcDir.equals(new Path(fsOp.getConf().getFinalDirName())))) { return mvTsk; } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -31,6 +31,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; @@ -172,9 +173,7 @@ ArrayList list = entry2.getValue(); if (list.contains(alias)) { // add to path set - if (!pathSet.contains(path)) { - pathSet.add(path); - } + pathSet.add(path); //remove this alias from the alias list list.remove(alias); if(list.size() == 0) { @@ -189,18 +188,18 @@ // create fetch work FetchWork fetchWork = null; - List partDir = new ArrayList(); + List partDir = new ArrayList(); List partDesc = new ArrayList(); for (String tablePath : pathSet) { PartitionDesc partitionDesc = newWork.getMapWork().getPathToPartitionInfo().get(tablePath); // create fetchwork for non partitioned table if (partitionDesc.getPartSpec() == null || partitionDesc.getPartSpec().size() == 0) { - fetchWork = new FetchWork(tablePath, partitionDesc.getTableDesc()); + fetchWork = new FetchWork(new Path(tablePath), partitionDesc.getTableDesc()); break; } // if table is partitioned,add partDir and partitionDesc - partDir.add(tablePath); + partDir.add(new Path(tablePath)); partDesc.add(partitionDesc); } // create fetchwork for partitioned table Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java (working copy) @@ -232,18 +232,17 @@ inputs.clear(); if (!table.isPartitioned()) { inputs.add(new ReadEntity(table)); - String path = table.getPath().toString(); - FetchWork work = new FetchWork(path, Utilities.getTableDesc(table)); + FetchWork work = new FetchWork(table.getPath(), Utilities.getTableDesc(table)); PlanUtils.configureInputJobPropertiesForStorageHandler(work.getTblDesc()); work.setSplitSample(splitSample); return work; } - List listP = new ArrayList(); + List listP = new ArrayList(); List partP = new ArrayList(); for (Partition partition : partsList.getNotDeniedPartns()) { inputs.add(new ReadEntity(partition)); - listP.add(partition.getPartitionPath().toString()); + listP.add(partition.getPartitionPath()); partP.add(Utilities.getPartitionDesc(partition)); } Table sourceTable = partsList.getSourceTable(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -694,10 +694,10 @@ tblDesc = Utilities.getTableDesc(partsList.getSourceTable()); localPlan.getAliasToFetchWork().put( alias_id, - new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc, tblDesc)); + new FetchWork(partDir, partDesc, tblDesc)); } else { localPlan.getAliasToFetchWork().put(alias_id, - new FetchWork(tblDir.toString(), tblDesc)); + new FetchWork(tblDir, tblDesc)); } plan.setMapLocalWork(localPlan); } @@ -745,7 +745,7 @@ assert localPlan.getAliasToWork().get(alias) == null; assert localPlan.getAliasToFetchWork().get(alias) == null; localPlan.getAliasToWork().put(alias, topOp); - localPlan.getAliasToFetchWork().put(alias, new FetchWork(alias, tt_desc)); + localPlan.getAliasToFetchWork().put(alias, new FetchWork(new Path(alias), tt_desc)); plan.setMapLocalWork(localPlan); } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java (working copy) @@ -26,6 +26,7 @@ import java.util.Set; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; @@ -118,10 +119,10 @@ PartitionDesc partitionInfo = currWork.getAliasToPartnInfo().get(alias); if (fetchWork.getTblDir() != null) { - currWork.mergeAliasedInput(alias, fetchWork.getTblDir(), partitionInfo); + currWork.mergeAliasedInput(alias, fetchWork.getTblDir().toUri().toString(), partitionInfo); } else { - for (String pathDir : fetchWork.getPartDir()) { - currWork.mergeAliasedInput(alias, pathDir, partitionInfo); + for (Path pathDir : fetchWork.getPartDir()) { + currWork.mergeAliasedInput(alias, pathDir.toUri().toString(), partitionInfo); } } } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java (working copy) @@ -308,7 +308,7 @@ localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2); Path tblDir = new Path(smallTblDirs.get(small_alias)); localPlan.getAliasToFetchWork().put(small_alias.toString(), - new FetchWork(tblDir.toString(), tableDescList.get(small_alias))); + new FetchWork(tblDir, tableDescList.get(small_alias))); } newPlan.setMapLocalWork(localPlan); Index: ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java (working copy) @@ -37,10 +37,10 @@ public class FetchWork implements Serializable { private static final long serialVersionUID = 1L; - private String tblDir; + private Path tblDir; private TableDesc tblDesc; - private ArrayList partDir; + private ArrayList partDir; private ArrayList partDesc; private Operator source; @@ -75,24 +75,24 @@ return rowsComputedFromStats; } - public FetchWork(String tblDir, TableDesc tblDesc) { + public FetchWork(Path tblDir, TableDesc tblDesc) { this(tblDir, tblDesc, -1); } - public FetchWork(String tblDir, TableDesc tblDesc, int limit) { + public FetchWork(Path tblDir, TableDesc tblDesc, int limit) { this.tblDir = tblDir; this.tblDesc = tblDesc; this.limit = limit; } - public FetchWork(List partDir, List partDesc, TableDesc tblDesc) { + public FetchWork(List partDir, List partDesc, TableDesc tblDesc) { this(partDir, partDesc, tblDesc, -1); } - public FetchWork(List partDir, List partDesc, + public FetchWork(List partDir, List partDesc, TableDesc tblDesc, int limit) { this.tblDesc = tblDesc; - this.partDir = new ArrayList(partDir); + this.partDir = new ArrayList(partDir); this.partDesc = new ArrayList(partDesc); this.limit = limit; } @@ -124,22 +124,15 @@ /** * @return the tblDir */ - public String getTblDir() { + public Path getTblDir() { return tblDir; } /** - * @return the tblDir - */ - public Path getTblDirPath() { - return new Path(tblDir); - } - - /** * @param tblDir * the tblDir to set */ - public void setTblDir(String tblDir) { + public void setTblDir(Path tblDir) { this.tblDir = tblDir; } @@ -161,45 +154,15 @@ /** * @return the partDir */ - public ArrayList getPartDir() { + public ArrayList getPartDir() { return partDir; } - public List getPartDirPath() { - return FetchWork.convertStringToPathArray(partDir); - } - - public static List convertPathToStringArray(List paths) { - if (paths == null) { - return null; - } - - List pathsStr = new ArrayList(); - for (Path path : paths) { - pathsStr.add(path.toString()); - } - - return pathsStr; - } - - public static List convertStringToPathArray(List paths) { - if (paths == null) { - return null; - } - - List pathsStr = new ArrayList(); - for (String path : paths) { - pathsStr.add(new Path(path)); - } - - return pathsStr; - } - /** * @param partDir * the partDir to set */ - public void setPartDir(ArrayList partDir) { + public void setPartDir(ArrayList partDir) { this.partDir = partDir; } @@ -228,7 +191,7 @@ // Construct a sorted Map of Partition Dir - Partition Descriptor; ordering is based on // patition dir (map key) // Assumption: there is a 1-1 mapping between partition dir and partition descriptor lists - TreeMap partDirToPartSpecMap = new TreeMap(); + TreeMap partDirToPartSpecMap = new TreeMap(); for (int i = 0; i < partDir.size(); i++) { partDirToPartSpecMap.put(partDir.get(i), partDesc.get(i)); } @@ -319,8 +282,8 @@ } String ret = "partition = "; - for (String part : partDir) { - ret = ret.concat(part); + for (Path part : partDir) { + ret = ret.concat(part.toUri().toString()); } return ret; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java (working copy) @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; -import java.util.LinkedHashMap; /** * ArchiveWork. @@ -28,9 +27,6 @@ @Explain(displayName = "Map Reduce") public class ArchiveWork implements Serializable { private static final long serialVersionUID = 1L; - private String tableName; - private String dbName; - private LinkedHashMap partSpec; private ArchiveActionType type; public static enum ArchiveActionType { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java (working copy) @@ -23,6 +23,7 @@ import java.util.HashSet; import java.util.List; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.parse.ParseContext; @@ -34,7 +35,7 @@ public class ExplainWork implements Serializable { private static final long serialVersionUID = 1L; - private String resFile; + private Path resFile; private ArrayList> rootTasks; private Task fetchTask; private String astStringTree; @@ -52,7 +53,7 @@ public ExplainWork() { } - public ExplainWork(String resFile, + public ExplainWork(Path resFile, ParseContext pCtx, List> rootTasks, Task fetchTask, @@ -74,11 +75,11 @@ this.pCtx = pCtx; } - public String getResFile() { + public Path getResFile() { return resFile; } - public void setResFile(String resFile) { + public void setResFile(Path resFile) { this.resFile = resFile; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LoadDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LoadDesc.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LoadDesc.java (working copy) @@ -37,11 +37,7 @@ this.sourcePath = sourcePath; } - @Explain(displayName = "source", normalExplain = false) - public String getSourceDir() { - return sourcePath.toString(); - } - + @Explain(displayName = "source", normalExplain = false) public Path getSourcePath() { return sourcePath; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java (working copy) @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; -import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -39,7 +38,6 @@ private LoadMultiFilesDesc loadMultiFilesWork; private boolean checkFileFormat; - ArrayList dpSpecPaths; // dynamic partition specified paths -- the root of DP columns /** * ReadEntitites that are passed to the hooks. @@ -72,14 +70,6 @@ this.checkFileFormat = checkFileFormat; } - public void setDPSpecPaths(ArrayList dpsp) { - dpSpecPaths = dpsp; - } - - public ArrayList getDPSpecPaths() { - return dpSpecPaths; - } - @Explain(displayName = "tables") public LoadTableDesc getLoadTableWork() { return loadTableWork; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/CopyWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/CopyWork.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CopyWork.java (working copy) @@ -45,21 +45,13 @@ this.toPath = toPath; this.setErrorOnSrcEmpty(errorOnSrcEmpty); } - - @Explain(displayName = "source") - public String getFromPathAsString() { - return fromPath.toUri().toString(); - } - @Explain(displayName = "destination") - public String getToPathAsString() { - return toPath.toUri().toString(); - } - + @Explain(displayName = "source") public Path getFromPath() { return fromPath; } + @Explain(displayName = "destination") public Path getToPath() { return toPath; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java (working copy) @@ -21,6 +21,8 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.fs.Path; + /** * Truncates managed table or partition */ @@ -32,8 +34,8 @@ private String tableName; private Map partSpec; private List columnIndexes; - private String inputDir; - private String outputDir; + private Path inputDir; + private Path outputDir; private ListBucketingCtx lbCtx; public TruncateTableDesc() { @@ -71,19 +73,19 @@ this.columnIndexes = columnIndexes; } - public String getInputDir() { + public Path getInputDir() { return inputDir; } - public void setInputDir(String inputDir) { + public void setInputDir(Path inputDir) { this.inputDir = inputDir; } - public String getOutputDir() { + public Path getOutputDir() { return outputDir; } - public void setOutputDir(String outputDir) { + public void setOutputDir(Path outputDir) { this.outputDir = outputDir; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java (working copy) @@ -32,7 +32,6 @@ implements Serializable { private static final long serialVersionUID = 1L; private boolean replace; - private String tmpDir; private DynamicPartitionCtx dpCtx; private ListBucketingCtx lbCtx; private boolean holdDDLTime; @@ -47,35 +46,34 @@ this.holdDDLTime = false; } - public LoadTableDesc(final Path sourcePath, final String tmpDir, + public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, final Map partitionSpec, final boolean replace) { super(sourcePath); - init(tmpDir, table, partitionSpec, replace); + init(table, partitionSpec, replace); } - public LoadTableDesc(final Path sourcePath, final String tmpDir, + public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, final Map partitionSpec) { - this(sourcePath, tmpDir, table, partitionSpec, true); + this(sourcePath, table, partitionSpec, true); } - public LoadTableDesc(final Path sourcePath, final String tmpDir, + public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, final DynamicPartitionCtx dpCtx) { super(sourcePath); this.dpCtx = dpCtx; if (dpCtx != null && dpCtx.getPartSpec() != null && partitionSpec == null) { - init(tmpDir, table, dpCtx.getPartSpec(), true); + init(table, dpCtx.getPartSpec(), true); } else { - init(tmpDir, table, new LinkedHashMap(), true); + init(table, new LinkedHashMap(), true); } } - private void init(final String tmpDir, + private void init( final org.apache.hadoop.hive.ql.plan.TableDesc table, final Map partitionSpec, final boolean replace) { - this.tmpDir = tmpDir; this.table = table; this.partitionSpec = partitionSpec; this.replace = replace; @@ -90,15 +88,6 @@ return holdDDLTime; } - @Explain(displayName = "tmp directory", normalExplain = false) - public String getTmpDir() { - return tmpDir; - } - - public void setTmpDir(final String tmp) { - tmpDir = tmp; - } - @Explain(displayName = "table") public TableDesc getTable() { return table; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (working copy) @@ -252,7 +252,7 @@ long totalSz = 0; boolean doMerge = false; // list of paths that don't need to merge but need to move to the dest location - List toMove = new ArrayList(); + List toMove = new ArrayList(); for (int i = 0; i < status.length; ++i) { long len = getMergeSize(inpFs, status[i].getPath(), avgConditionSize); if (len >= 0) { @@ -263,7 +263,7 @@ work.resolveDynamicPartitionStoredAsSubDirsMerge(conf, status[i].getPath(), tblDesc, aliases, pDesc); } else { - toMove.add(status[i].getPath().toString()); + toMove.add(status[i].getPath()); } } if (doMerge) { @@ -287,11 +287,7 @@ List targetDirs = new ArrayList(toMove.size()); for (int i = 0; i < toMove.size(); i++) { - String toMoveStr = toMove.get(i); - if (toMoveStr.endsWith(Path.SEPARATOR)) { - toMoveStr = toMoveStr.substring(0, toMoveStr.length() - 1); - } - String[] moveStrSplits = toMoveStr.split(Path.SEPARATOR); + String[] moveStrSplits = toMove.get(i).toUri().toString().split(Path.SEPARATOR); int dpIndex = moveStrSplits.length - dpLbLevel; Path target = targetDir; while (dpIndex < moveStrSplits.length) { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java (working copy) @@ -35,7 +35,7 @@ // list of columns, comma separated private String columns; private String columnTypes; - private List srcDirs; + private transient List srcDirs; static { PTFUtils.makeTransient(LoadMultiFilesDesc.class, "targetDirs"); @@ -43,7 +43,7 @@ public LoadMultiFilesDesc() { } - public LoadMultiFilesDesc(final List sourceDirs, final List targetDir, + public LoadMultiFilesDesc(final List sourceDirs, final List targetDir, final boolean isDfsDir, final String columns, final String columnTypes) { this.srcDirs = sourceDirs; @@ -59,11 +59,11 @@ } @Explain(displayName = "sources") - public List getSourceDirs() { + public List getSourceDirs() { return srcDirs; } - public void setSourceDirs(List srcs) { + public void setSourceDirs(List srcs) { this.srcDirs = srcs; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java (working copy) @@ -259,7 +259,7 @@ LoadTableDesc loadTableWork; loadTableWork = new LoadTableDesc(new Path(fromURI), - loadTmpPath, Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite); + Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite); Task childTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true), conf); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java (working copy) @@ -149,7 +149,7 @@ resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat); } - FetchWork fetch = new FetchWork(new Path(loadFileDesc.getSourceDir()).toString(), + FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, qb.getParseInfo().getOuterQueryLimit()); fetch.setSource(pCtx.getFetchSource()); fetch.setSink(pCtx.getFetchSink()); @@ -480,7 +480,7 @@ String resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT); TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat); - fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(), + fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, qb.getParseInfo().getOuterQueryLimit()); ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, partName, Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java (working copy) @@ -285,7 +285,6 @@ Task copyTask = TaskFactory.get(new CopyWork(dataPath, tmpPath, false), conf); LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, - ctx.getExternalTmpFileURI(fromURI), Utilities.getTableDesc(table), new TreeMap(), false); Task loadTableTask = TaskFactory.get(new MoveWork(getInputs(), @@ -332,7 +331,6 @@ Task addPartTask = TaskFactory.get(new DDLWork(getInputs(), getOutputs(), addPartitionDesc), conf); LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, - ctx.getExternalTmpFileURI(fromURI), Utilities.getTableDesc(table), addPartitionDesc.getPartSpec(), true); loadTableWork.setInheritTableSpecs(false); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTablePartMergeFilesDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTablePartMergeFilesDesc.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTablePartMergeFilesDesc.java (working copy) @@ -33,7 +33,7 @@ private HashMap partSpec; private ListBucketingCtx lbCtx; // context for list bucketing. - private List inputDir = new ArrayList(); + private List inputDir = new ArrayList(); private Path outputDir = null; public AlterTablePartMergeFilesDesc(String tableName, @@ -68,11 +68,11 @@ this.outputDir = outputDir; } - public List getInputDir() { + public List getInputDir() { return inputDir; } - public void setInputDir(List inputDir) { + public void setInputDir(List inputDir) { this.inputDir = inputDir; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -5355,8 +5355,7 @@ // Create the work for moving the table // NOTE: specify Dynamic partitions in dest_tab for WriteEntity if (!isNonNativeTable) { - ltd = new LoadTableDesc(new Path(queryTmpdir), ctx.getExternalTmpFileURI(dest_path.toUri()), - table_desc, dpCtx); + ltd = new LoadTableDesc(new Path(queryTmpdir),table_desc, dpCtx); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())); ltd.setLbCtx(lbCtx); @@ -5439,8 +5438,7 @@ lbCtx = constructListBucketingCtx(dest_part.getSkewedColNames(), dest_part.getSkewedColValues(), dest_part.getSkewedColValueLocationMaps(), dest_part.isStoredAsSubDirectories(), conf); - ltd = new LoadTableDesc(new Path(queryTmpdir), ctx.getExternalTmpFileURI(dest_path.toUri()), - table_desc, dest_part.getSpec()); + ltd = new LoadTableDesc(new Path(queryTmpdir), table_desc, dest_part.getSpec()); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())); ltd.setLbCtx(lbCtx); @@ -5658,7 +5656,7 @@ if (ltd != null && SessionState.get() != null) { SessionState.get().getLineageState() - .mapDirToFop(ltd.getSourceDir(), (FileSinkOperator) output); + .mapDirToFop(ltd.getSourcePath(), (FileSinkOperator) output); } if (LOG.isDebugEnabled()) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (working copy) @@ -77,7 +77,7 @@ pCtx = ((SemanticAnalyzer)sem).getParseContext(); } - ExplainWork work = new ExplainWork(ctx.getResFile().toString(), + ExplainWork work = new ExplainWork(ctx.getResFile(), pCtx, tasks, fetchTask, Index: ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (working copy) @@ -1000,7 +1000,7 @@ truncateTblDesc.setColumnIndexes(new ArrayList(columnIndexes)); - truncateTblDesc.setInputDir(oldTblPartLoc.toString()); + truncateTblDesc.setInputDir(oldTblPartLoc); addInputsOutputsAlterTable(tableName, partSpec); truncateTblDesc.setLbCtx(lbCtx); @@ -1011,8 +1011,8 @@ // Write the output to temporary directory and move it to the final location at the end // so the operation is atomic. String queryTmpdir = ctx.getExternalTmpFileURI(newTblPartLoc.toUri()); - truncateTblDesc.setOutputDir(queryTmpdir); - LoadTableDesc ltd = new LoadTableDesc(new Path(queryTmpdir), queryTmpdir, tblDesc, + truncateTblDesc.setOutputDir(new Path(queryTmpdir)); + LoadTableDesc ltd = new LoadTableDesc(new Path(queryTmpdir), tblDesc, partSpec == null ? new HashMap() : partSpec); ltd.setLbCtx(lbCtx); Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), @@ -1534,7 +1534,7 @@ AlterTablePartMergeFilesDesc mergeDesc = new AlterTablePartMergeFilesDesc( tableName, partSpec); - List inputDir = new ArrayList(); + List inputDir = new ArrayList(); Path oldTblPartLoc = null; Path newTblPartLoc = null; Table tblObj = null; @@ -1614,7 +1614,7 @@ "Merge can not perform on archived partitions."); } - inputDir.add(oldTblPartLoc.toString()); + inputDir.add(oldTblPartLoc); mergeDesc.setInputDir(inputDir); @@ -1627,7 +1627,7 @@ TableDesc tblDesc = Utilities.getTableDesc(tblObj); String queryTmpdir = ctx.getExternalTmpFileURI(newTblPartLoc.toUri()); mergeDesc.setOutputDir(new Path(queryTmpdir)); - LoadTableDesc ltd = new LoadTableDesc(new Path(queryTmpdir), queryTmpdir, tblDesc, + LoadTableDesc ltd = new LoadTableDesc(new Path(queryTmpdir), tblDesc, partSpec == null ? new HashMap() : partSpec); ltd.setLbCtx(lbCtx); Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), @@ -1948,7 +1948,7 @@ prop.setProperty("columns", colTypes[0]); prop.setProperty("columns.types", colTypes[1]); prop.setProperty(serdeConstants.SERIALIZATION_LIB, LazySimpleSerDe.class.getName()); - FetchWork fetch = new FetchWork(ctx.getResFile().toString(), new TableDesc( + FetchWork fetch = new FetchWork(ctx.getResFile(), new TableDesc( TextInputFormat.class,IgnoreKeyTextOutputFormat.class, prop), -1); fetch.setSerializationNullFormat(" "); return (FetchTask) TaskFactory.get(fetch, conf); Index: ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java (working copy) @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -42,7 +43,7 @@ * time and is then later used to created the mapping from * movetask to the set of filesink operators. */ - private final Map dirToFop; + private final Map dirToFop; /** * The lineage context index for this query. @@ -59,7 +60,7 @@ * Constructor. */ public LineageState() { - dirToFop = new HashMap(); + dirToFop = new HashMap(); linfo = new LineageInfo(); } @@ -69,7 +70,7 @@ * @param dir The directory name. * @param fop The file sink operator. */ - public void mapDirToFop(String dir, FileSinkOperator fop) { + public void mapDirToFop(Path dir, FileSinkOperator fop) { dirToFop.put(dir, fop); } @@ -80,7 +81,7 @@ * @param dc The associated data container. * @param cols The list of columns. */ - public void setLineage(String dir, DataContainer dc, + public void setLineage(Path dir, DataContainer dc, List cols) { // First lookup the file sink operator from the load work. FileSinkOperator fop = dirToFop.get(dir); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (working copy) @@ -193,7 +193,7 @@ PrintStream out = null; try { - Path resFile = new Path(work.getResFile()); + Path resFile = work.getResFile(); OutputStream outS = resFile.getFileSystem(conf).create(resFile); out = new PrintStream(outS); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (working copy) @@ -299,7 +299,7 @@ if (iterPath == null) { if (work.isNotPartitioned()) { if (!tblDataDone) { - currPath = work.getTblDirPath(); + currPath = work.getTblDir(); currTbl = work.getTblDesc(); if (isNativeTable) { FileSystem fs = currPath.getFileSystem(job); @@ -326,7 +326,7 @@ } return; } else { - iterPath = FetchWork.convertStringToPathArray(work.getPartDir()).iterator(); + iterPath = work.getPartDir().iterator(); iterPartDesc = work.getPartDesc().iterator(); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (working copy) @@ -702,6 +702,20 @@ output.writeString(token.getText()); } } + + private static class PathSerializer extends com.esotericsoftware.kryo.Serializer { + + @Override + public void write(Kryo kryo, Output output, Path path) { + output.writeString(path.toUri().toString()); + } + + @Override + public Path read(Kryo kryo, Input input, Class type) { + return new Path(URI.create(input.readString())); + } + } + private static void serializePlan(Object plan, OutputStream out, Configuration conf, boolean cloningPlan) { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SERIALIZE_PLAN); @@ -843,6 +857,7 @@ kryo.setClassLoader(Thread.currentThread().getContextClassLoader()); kryo.register(java.sql.Date.class, new SqlDateSerializer()); kryo.register(java.sql.Timestamp.class, new TimestampSerializer()); + kryo.register(Path.class, new PathSerializer()); removeField(kryo, Operator.class, "colExprMap"); removeField(kryo, ColumnInfo.class, "objectInspector"); removeField(kryo, MapWork.class, "opParseCtxMap"); @@ -864,6 +879,7 @@ kryo.register(CommonToken.class, new CommonTokenSerializer()); kryo.register(java.sql.Date.class, new SqlDateSerializer()); kryo.register(java.sql.Timestamp.class, new TimestampSerializer()); + kryo.register(Path.class, new PathSerializer()); return kryo; }; }; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (working copy) @@ -205,7 +205,7 @@ LoadFileDesc lfd = work.getLoadFileWork(); if (lfd != null) { Path targetPath = lfd.getTargetDir(); - Path sourcePath = new Path(lfd.getSourceDir()); + Path sourcePath = lfd.getSourcePath(); moveFile(sourcePath, targetPath, lfd.getIsDfsDir()); } @@ -216,7 +216,7 @@ boolean isDfsDir = lmfd.getIsDfsDir(); int i = 0; while (i sortCols = null; int numBuckets = -1; Task task = this; - String path = tbd.getSourceDir(); + String path = tbd.getSourcePath().toUri().toString(); // Find the first ancestor of this MoveTask which is some form of map reduce task // (Either standard, local, or a merge) while (task.getParentTasks() != null && task.getParentTasks().size() == 1) { @@ -330,7 +330,7 @@ // condition for merging is not met, see GenMRFileSink1. if (task instanceof MoveTask) { if (((MoveTask)task).getWork().getLoadFileWork() != null) { - path = ((MoveTask)task).getWork().getLoadFileWork().getSourceDir(); + path = ((MoveTask)task).getWork().getLoadFileWork().getSourcePath().toUri().toString(); } } } @@ -354,7 +354,7 @@ // want to isolate any potential issue it may introduce. ArrayList> dp = db.loadDynamicPartitions( - new Path(tbd.getSourceDir()), + tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), @@ -394,7 +394,7 @@ dc = new DataContainer(table.getTTable(), partn.getTPartition()); if (SessionState.get() != null) { - SessionState.get().getLineageState().setLineage(tbd.getSourceDir(), dc, + SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, table.getCols()); } @@ -405,7 +405,7 @@ List partVals = MetaStoreUtils.getPvals(table.getPartCols(), tbd.getPartitionSpec()); db.validatePartitionNameCharacters(partVals); - db.loadPartition(new Path(tbd.getSourceDir()), tbd.getTable().getTableName(), + db.loadPartition(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), tbd.getHoldDDLTime(), tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd)); Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); @@ -422,7 +422,7 @@ } } if (SessionState.get() != null && dc != null) { - SessionState.get().getLineageState().setLineage(tbd.getSourceDir(), dc, + SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, table.getCols()); } releaseLocks(tbd); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java (working copy) @@ -498,21 +498,22 @@ ArrayList paths = mWork.getPaths(); ArrayList parts = mWork.getPartitionDescs(); - Path onePath = new Path(paths.get(0)); - String tmpPath = context.getCtx().getExternalTmpFileURI(onePath.toUri()); - + List inputPaths = new ArrayList(paths.size()); + for (String path : paths) { + inputPaths.add(new Path(path)); + } + + String tmpPath = context.getCtx().getExternalTmpFileURI(inputPaths.get(0).toUri()); Path partitionFile = new Path(tmpPath, ".partitions"); ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile); - PartitionKeySampler sampler = new PartitionKeySampler(); if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) { console.printInfo("Use sampling data created in previous MR"); // merges sampling data from previous MR and make paritition keys for total sort - for (String path : paths) { - Path inputPath = new Path(path); - FileSystem fs = inputPath.getFileSystem(job); - for (FileStatus status : fs.globStatus(new Path(inputPath, ".sampling*"))) { + for (Path path : inputPaths) { + FileSystem fs = path.getFileSystem(job); + for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) { sampler.addSampleFile(status.getPath(), job); } } @@ -524,9 +525,9 @@ FetchWork fetchWork; if (!partDesc.isPartitioned()) { assert paths.size() == 1; - fetchWork = new FetchWork(paths.get(0), partDesc.getTableDesc()); + fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc()); } else { - fetchWork = new FetchWork(paths, parts, partDesc.getTableDesc()); + fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc()); } fetchWork.setSource(ts); Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java (working copy) @@ -44,7 +44,7 @@ private static final long serialVersionUID = 1L; - private List inputPaths; + private transient List inputPaths; private transient Path outputDir; private boolean hasDynamicPartitions; private DynamicPartitionCtx dynPartCtx; @@ -54,11 +54,11 @@ public MergeWork() { } - public MergeWork(List inputPaths, Path outputDir) { + public MergeWork(List inputPaths, Path outputDir) { this(inputPaths, outputDir, false, null); } - public MergeWork(List inputPaths, Path outputDir, + public MergeWork(List inputPaths, Path outputDir, boolean hasDynamicPartitions, DynamicPartitionCtx dynPartCtx) { super(); this.inputPaths = inputPaths; @@ -70,16 +70,16 @@ if(this.getPathToPartitionInfo() == null) { this.setPathToPartitionInfo(new LinkedHashMap()); } - for(String path: this.inputPaths) { - this.getPathToPartitionInfo().put(path, partDesc); + for(Path path: this.inputPaths) { + this.getPathToPartitionInfo().put(path.toUri().toString(), partDesc); } } - public List getInputPaths() { + public List getInputPaths() { return inputPaths; } - public void setInputPaths(List inputPaths) { + public void setInputPaths(List inputPaths) { this.inputPaths = inputPaths; } @@ -133,7 +133,7 @@ super.resolveDynamicPartitionStoredAsSubDirsMerge(conf, path, tblDesc, aliases, partDesc); // Add the DP path to the list of input paths - inputPaths.add(path.toString()); + inputPaths.add(path); } /** @@ -148,18 +148,17 @@ // use sub-dir as inputpath. assert ((this.inputPaths != null) && (this.inputPaths.size() == 1)) : "alter table ... concatenate should only have one directory inside inputpaths"; - String dirName = inputPaths.get(0); - Path dirPath = new Path(dirName); + Path dirPath = inputPaths.get(0); try { FileSystem inpFs = dirPath.getFileSystem(conf); FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(dirPath, listBucketingCtx .getSkewedColNames().size(), inpFs); - List newInputPath = new ArrayList(); + List newInputPath = new ArrayList(); boolean succeed = true; for (int i = 0; i < status.length; ++i) { if (status[i].isDir()) { // Add the lb path to the list of input paths - newInputPath.add(status[i].getPath().toString()); + newInputPath.add(status[i].getPath()); } else { // find file instead of dir. dont change inputpath succeed = false; @@ -173,7 +172,7 @@ inputPaths.addAll(newInputPath); } } catch (IOException e) { - String msg = "Fail to get filesystem for directory name : " + dirName; + String msg = "Fail to get filesystem for directory name : " + dirPath.toUri(); throw new RuntimeException(msg, e); } Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java (working copy) @@ -254,8 +254,8 @@ } private void addInputPaths(JobConf job, MergeWork work) { - for (String path : work.getInputPaths()) { - FileInputFormat.addInputPath(job, new Path(path)); + for (Path path : work.getInputPaths()) { + FileInputFormat.addInputPath(job, path); } } @@ -291,7 +291,7 @@ printUsage(); } - List inputPaths = new ArrayList(); + List inputPaths = new ArrayList(); String[] paths = inputPathStr.split(INPUT_SEPERATOR); if (paths == null || paths.length == 0) { printUsage(); @@ -309,10 +309,10 @@ if (fstatus.isDir()) { FileStatus[] fileStatus = fs.listStatus(pathObj); for (FileStatus st : fileStatus) { - inputPaths.add(st.getPath().toString()); + inputPaths.add(st.getPath()); } } else { - inputPaths.add(fstatus.getPath().toString()); + inputPaths.add(fstatus.getPath()); } } catch (IOException e) { e.printStackTrace(System.err); Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java (working copy) @@ -130,8 +130,8 @@ throw new RuntimeException(e.getMessage()); } - String outputPath = this.work.getOutputDir(); - Path tempOutPath = Utilities.toTempPath(new Path(outputPath)); + Path outputPath = this.work.getOutputDir(); + Path tempOutPath = Utilities.toTempPath(outputPath); try { FileSystem fs = tempOutPath.getFileSystem(job); if (!fs.exists(tempOutPath)) { @@ -230,7 +230,7 @@ } private void addInputPaths(JobConf job, ColumnTruncateWork work) { - FileInputFormat.addInputPath(job, new Path(work.getInputDir())); + FileInputFormat.addInputPath(job, work.getInputDir()); } @Override Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java (working copy) @@ -22,6 +22,7 @@ import java.util.LinkedHashMap; import java.util.List; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; @@ -36,8 +37,8 @@ private static final long serialVersionUID = 1L; - private String inputDir; - private String outputDir; + private transient Path inputDir; + private Path outputDir; private boolean hasDynamicPartitions; private DynamicPartitionCtx dynPartCtx; private boolean isListBucketingAlterTableConcatenate; @@ -47,11 +48,11 @@ public ColumnTruncateWork() { } - public ColumnTruncateWork(List droppedColumns, String inputDir, String outputDir) { + public ColumnTruncateWork(List droppedColumns, Path inputDir, Path outputDir) { this(droppedColumns, inputDir, outputDir, false, null); } - public ColumnTruncateWork(List droppedColumns, String inputDir, String outputDir, + public ColumnTruncateWork(List droppedColumns, Path inputDir, Path outputDir, boolean hasDynamicPartitions, DynamicPartitionCtx dynPartCtx) { super(); this.droppedColumns = droppedColumns; @@ -64,22 +65,22 @@ if(this.getPathToPartitionInfo() == null) { this.setPathToPartitionInfo(new LinkedHashMap()); } - this.getPathToPartitionInfo().put(inputDir, partDesc); + this.getPathToPartitionInfo().put(inputDir.toUri().toString(), partDesc); } - public String getInputDir() { + public Path getInputDir() { return inputDir; } - public void setInputPaths(String inputDir) { + public void setInputPaths(Path inputDir) { this.inputDir = inputDir; } - public String getOutputDir() { + public Path getOutputDir() { return outputDir; } - public void setOutputDir(String outputDir) { + public void setOutputDir(Path outputDir) { this.outputDir = outputDir; } Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java (working copy) @@ -78,12 +78,12 @@ jc = job; work = (ColumnTruncateWork) Utilities.getMapWork(job); - String specPath = work.getOutputDir(); + Path specPath = work.getOutputDir(); Path tmpPath = Utilities.toTempPath(specPath); Path taskTmpPath = Utilities.toTaskTempPath(specPath); updatePaths(tmpPath, taskTmpPath); try { - fs = (new Path(specPath)).getFileSystem(job); + fs = specPath.getFileSystem(job); autoDelete = fs.deleteOnExit(outPath); } catch (IOException e) { this.exception = true; @@ -229,13 +229,12 @@ } } - public static void jobClose(String outputPath, boolean success, JobConf job, + public static void jobClose(Path outputPath, boolean success, JobConf job, LogHelper console, DynamicPartitionCtx dynPartCtx, Reporter reporter ) throws HiveException, IOException { - Path outpath = new Path(outputPath); - FileSystem fs = outpath.getFileSystem(job); - Path backupPath = backupOutputPath(fs, outpath, job); - Utilities.mvFileToFinalPath(outputPath, job, success, LOG, dynPartCtx, null, + FileSystem fs = outputPath.getFileSystem(job); + Path backupPath = backupOutputPath(fs, outputPath, job); + Utilities.mvFileToFinalPath(outputPath.toUri().toString(), job, success, LOG, dynPartCtx, null, reporter); fs.delete(backupPath, true); } Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java (working copy) @@ -259,8 +259,8 @@ } private void addInputPaths(JobConf job, PartialScanWork work) { - for (String path : work.getInputPaths()) { - FileInputFormat.addInputPath(job, new Path(path)); + for (Path path : work.getInputPaths()) { + FileInputFormat.addInputPath(job, path); } } @@ -296,7 +296,7 @@ printUsage(); } - List inputPaths = new ArrayList(); + List inputPaths = new ArrayList(); String[] paths = inputPathStr.split(INPUT_SEPERATOR); if (paths == null || paths.length == 0) { printUsage(); @@ -314,10 +314,10 @@ if (fstatus.isDir()) { FileStatus[] fileStatus = fs.listStatus(pathObj); for (FileStatus st : fileStatus) { - inputPaths.add(st.getPath().toString()); + inputPaths.add(st.getPath()); } } else { - inputPaths.add(fstatus.getPath().toString()); + inputPaths.add(fstatus.getPath()); } } catch (IOException e) { e.printStackTrace(System.err); Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java (revision 1553986) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java (working copy) @@ -22,6 +22,7 @@ import java.util.LinkedHashMap; import java.util.List; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat; import org.apache.hadoop.hive.ql.plan.Explain; @@ -38,13 +39,13 @@ private static final long serialVersionUID = 1L; - private List inputPaths; + private transient List inputPaths; private String aggKey; public PartialScanWork() { } - public PartialScanWork(List inputPaths) { + public PartialScanWork(List inputPaths) { super(); this.inputPaths = inputPaths; PartitionDesc partDesc = new PartitionDesc(); @@ -52,16 +53,16 @@ if(this.getPathToPartitionInfo() == null) { this.setPathToPartitionInfo(new LinkedHashMap()); } - for(String path: this.inputPaths) { - this.getPathToPartitionInfo().put(path, partDesc); + for(Path path: this.inputPaths) { + this.getPathToPartitionInfo().put(path.toUri().toString(), partDesc); } } - public List getInputPaths() { + public List getInputPaths() { return inputPaths; } - public void setInputPaths(List inputPaths) { + public void setInputPaths(List inputPaths) { this.inputPaths = inputPaths; }