diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index be105c1..2a6b944 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -460,12 +460,12 @@ public int compile(String command, boolean resetTaskIds) { // serialize the queryPlan FileOutputStream fos = new FileOutputStream(queryPlanFileName); - Utilities.serializeQueryPlan(plan, fos); + Utilities.serializeObject(plan, fos); fos.close(); // deserialize the queryPlan FileInputStream fis = new FileInputStream(queryPlanFileName); - QueryPlan newPlan = Utilities.deserializeQueryPlan(fis, conf); + QueryPlan newPlan = Utilities.deserializeObject(fis); fis.close(); // Use the deserialized plan @@ -878,14 +878,17 @@ private void releaseLocks(List hiveLocks) { public CommandProcessorResponse run(String command) throws CommandNeedRetryException { CommandProcessorResponse cpr = runInternal(command); - if(cpr.getResponseCode() == 0) + if(cpr.getResponseCode() == 0) { return cpr; + } SessionState ss = SessionState.get(); - if(ss == null) + if(ss == null) { return cpr; + } MetaDataFormatter mdf = MetaDataFormatUtils.getFormatter(ss.getConf()); - if(!(mdf instanceof JsonMetaDataFormatter)) + if(!(mdf instanceof JsonMetaDataFormatter)) { return cpr; + } /*Here we want to encode the error in machine readable way (e.g. JSON) * Ideally, errorCode would always be set to a canonical error defined in ErrorMsg. * In practice that is rarely the case, so the messy logic below tries to tease diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java index 1b3a226..2f69c8d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java @@ -233,7 +233,7 @@ private void populateQueryPlan() throws IOException { mapTask.setTaskId(stage.getStageId() + "_MAP"); mapTask.setTaskType(TaskType.MAP); stage.addToTaskList(mapTask); - populateOperatorGraph(mapTask, mrTask.getWork().getAliasToWork() + populateOperatorGraph(mapTask, mrTask.getWork().getMapWork().getAliasToWork() .values()); // populate reduce task @@ -245,7 +245,7 @@ private void populateQueryPlan() throws IOException { stage.addToTaskList(reduceTask); Collection> reducerTopOps = new ArrayList>(); - reducerTopOps.add(mrTask.getWork().getReducer()); + reducerTopOps.add(mrTask.getWork().getReduceWork().getReducer()); populateOperatorGraph(reduceTask, reducerTopOps); } } else { @@ -382,7 +382,7 @@ private void extractCounters() throws IOException { } if (task instanceof ExecDriver) { ExecDriver mrTask = (ExecDriver) task; - extractOperatorCounters(mrTask.getWork().getAliasToWork().values(), + extractOperatorCounters(mrTask.getWork().getMapWork().getAliasToWork().values(), task.getId() + "_MAP"); if (mrTask.mapStarted()) { started.add(task.getId() + "_MAP"); @@ -393,7 +393,7 @@ private void extractCounters() throws IOException { if (mrTask.hasReduce()) { Collection> reducerTopOps = new ArrayList>(); - reducerTopOps.add(mrTask.getWork().getReducer()); + reducerTopOps.add(mrTask.getWork().getReduceWork().getReducer()); extractOperatorCounters(reducerTopOps, task.getId() + "_REDUCE"); if (mrTask.reduceStarted()) { started.add(task.getId() + "_REDUCE"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 5d8b9b9..7e1f6ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -121,8 +121,9 @@ public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws E } if (work.getParseContext() != null) { - JSONObject jsonPlan = outputMap(work.getParseContext().getTopOps(), - "LOGICAL PLAN", out, jsonOutput, work.getExtended(), 0); + out.print("LOGICAL PLAN"); + JSONObject jsonPlan = outputMap(work.getParseContext().getTopOps(), true, + out, jsonOutput, work.getExtended(), 0); if (out != null) { out.println(); } @@ -228,19 +229,16 @@ private String indentString(int indent) { return sb.toString(); } - private JSONObject outputMap(Map mp, String header, PrintStream out, + private JSONObject outputMap(Map mp, boolean hasHeader, PrintStream out, boolean extended, boolean jsonOutput, int indent) throws Exception { - boolean first_el = true; TreeMap tree = new TreeMap(); tree.putAll(mp); JSONObject json = jsonOutput ? new JSONObject() : null; + if (out != null && hasHeader && !mp.isEmpty()) { + out.println(); + } for (Entry ent : tree.entrySet()) { - if (first_el && (out != null)) { - out.println(header); - } - first_el = false; - // Print the key if (out != null) { out.print(indentString(indent)); @@ -286,7 +284,7 @@ else if (ent.getValue() instanceof Serializable) { return jsonOutput ? json : null; } - private JSONArray outputList(List l, String header, PrintStream out, + private JSONArray outputList(List l, PrintStream out, boolean hasHeader, boolean extended, boolean jsonOutput, int indent) throws Exception { boolean first_el = true; @@ -294,10 +292,6 @@ private JSONArray outputList(List l, String header, PrintStream out, JSONArray outputArray = new JSONArray(); for (Object o : l) { - if (first_el && (out != null)) { - out.print(header); - } - if (isPrintable(o)) { String delim = first_el ? " " : ", "; if (out != null) { @@ -311,11 +305,11 @@ private JSONArray outputList(List l, String header, PrintStream out, nl = true; } else if (o instanceof Serializable) { - if (first_el && (out != null)) { + if (first_el && (out != null) && hasHeader) { out.println(); } JSONObject jsonOut = outputPlan((Serializable) o, out, extended, - jsonOutput, jsonOutput ? 0 : indent + 2); + jsonOutput, jsonOutput ? 0 : (hasHeader ? indent + 2 : indent)); if (jsonOutput) { outputArray.put(jsonOut); } @@ -439,10 +433,14 @@ private JSONObject outputPlan(Serializable work, PrintStream out, } String header = null; + boolean skipHeader = xpl_note.skipHeader(); + boolean emptyHeader = false; + if (!xpl_note.displayName().equals("")) { header = indentString(prop_indents) + xpl_note.displayName() + ":"; } else { + emptyHeader = true; prop_indents = indent; header = indentString(prop_indents); } @@ -450,7 +448,9 @@ private JSONObject outputPlan(Serializable work, PrintStream out, // Try the output as a primitive object if (isPrintable(val)) { if (out != null && shouldPrint(xpl_note, val)) { - out.printf("%s ", header); + if (!skipHeader) { + out.printf("%s ", header); + } out.println(val); } if (jsonOutput) { @@ -458,12 +458,26 @@ private JSONObject outputPlan(Serializable work, PrintStream out, } continue; } + + int ind = 0; + if (!jsonOutput) { + if (!skipHeader) { + ind = prop_indents + 2; + } else { + ind = indent; + } + } + // Try this as a map try { // Go through the map and print out the stuff Map mp = (Map) val; - JSONObject jsonOut = outputMap(mp, header, out, extended, jsonOutput, - jsonOutput ? 0 : prop_indents + 2); + + if (out != null && !skipHeader && mp != null && !mp.isEmpty()) { + out.print(header); + } + + JSONObject jsonOut = outputMap(mp, !skipHeader && !emptyHeader, out, extended, jsonOutput, ind); if (jsonOutput) { json.put(header, jsonOut); } @@ -476,8 +490,12 @@ private JSONObject outputPlan(Serializable work, PrintStream out, // Try this as a list try { List l = (List) val; - JSONArray jsonOut = outputList(l, header, out, extended, jsonOutput, - jsonOutput ? 0 : prop_indents + 2); + + if (out != null && !skipHeader && l != null && !l.isEmpty()) { + out.print(header); + } + + JSONArray jsonOut = outputList(l, out, !skipHeader && !emptyHeader, extended, jsonOutput, ind); if (jsonOutput) { json.put(header, jsonOut); @@ -492,11 +510,11 @@ private JSONObject outputPlan(Serializable work, PrintStream out, // Finally check if it is serializable try { Serializable s = (Serializable) val; - if (out != null) { + + if (!skipHeader && out != null) { out.println(header); } - JSONObject jsonOut = outputPlan(s, out, extended, jsonOutput, - jsonOutput ? 0 : prop_indents + 2); + JSONObject jsonOut = outputPlan(s, out, extended, jsonOutput, ind); if (jsonOutput) { json.put(header, jsonOut); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index cf8bd9d..9bc8dac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -36,7 +36,7 @@ import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; -import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -63,7 +63,7 @@ * different from regular operators in that it starts off by processing a * Writable data structure from a Table (instead of a Hive Object). **/ -public class MapOperator extends Operator implements Serializable, Cloneable { +public class MapOperator extends Operator implements Serializable, Cloneable { private static final long serialVersionUID = 1L; @@ -229,14 +229,14 @@ public Converter getPartTblObjectInspectorConverter() { * @param mrwork * @throws HiveException */ - public void initializeAsRoot(Configuration hconf, MapredWork mrwork) + public void initializeAsRoot(Configuration hconf, MapWork mapWork) throws HiveException { - setConf(mrwork); + setConf(mapWork); setChildren(hconf); initialize(hconf, null); } - private MapOpCtx initObjectInspector(MapredWork conf, + private MapOpCtx initObjectInspector(MapWork conf, Configuration hconf, String onefile, Map convertedOI) throws HiveException, ClassNotFoundException, InstantiationException, IllegalAccessException, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index a271279..cee95fd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.api.StageType; @@ -306,9 +307,13 @@ public int execute(DriverContext driverContext) { // the directory this move task is moving if (task instanceof MapRedTask) { MapredWork work = (MapredWork)task.getWork(); - bucketCols = work.getBucketedColsByDirectory().get(path); - sortCols = work.getSortedColsByDirectory().get(path); - numBuckets = work.getNumReduceTasks(); + MapWork mapWork = work.getMapWork(); + bucketCols = mapWork.getBucketedColsByDirectory().get(path); + sortCols = mapWork.getSortedColsByDirectory().get(path); + if (work.getReduceWork() != null) { + numBuckets = work.getReduceWork().getNumReduceTasks(); + } + if (bucketCols != null || sortCols != null) { // This must be a final map reduce task (the task containing the file sink // operator that writes the final output) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index b789d78..27e8d19 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -111,6 +111,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -118,14 +119,15 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; -import org.apache.hadoop.hive.ql.plan.MapredLocalWork; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.PlanUtils.ExpressionTypes; +import org.apache.hadoop.hive.ql.plan.ReduceWork; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.api.Adjacency; import org.apache.hadoop.hive.ql.plan.api.Graph; -import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.ql.stats.StatsPublisher; @@ -166,6 +168,8 @@ */ public static String HADOOP_LOCAL_FS = "file:///"; + public static String MAP_PLAN_NAME = "map.xml"; + public static String REDUCE_PLAN_NAME = "reduce.xml"; /** * ReduceField. @@ -179,54 +183,80 @@ private Utilities() { // prevent instantiation } - private static Map gWorkMap = Collections - .synchronizedMap(new HashMap()); + private static Map gWorkMap = Collections + .synchronizedMap(new HashMap()); private static final Log LOG = LogFactory.getLog(Utilities.class.getName()); - public static void clearMapRedWork(Configuration job) { + public static void clearWork(Configuration job) { + Path mapPath = getPlanPath(job, MAP_PLAN_NAME); + Path reducePath = getPlanPath(job, REDUCE_PLAN_NAME); + + // if the plan path hasn't been initialized just return, nothing to clean. + if (mapPath == null || reducePath == null) { + return; + } + try { - Path planPath = new Path(HiveConf.getVar(job, HiveConf.ConfVars.PLAN)); - FileSystem fs = planPath.getFileSystem(job); - if (fs.exists(planPath)) { - try { - fs.delete(planPath, true); - } catch (IOException e) { - e.printStackTrace(); - } + FileSystem fs = mapPath.getFileSystem(job); + if (fs.exists(mapPath)) { + fs.delete(mapPath, true); } + if (fs.exists(mapPath)) { + fs.delete(mapPath, true); + } + } catch (Exception e) { + e.printStackTrace(); } finally { // where a single process works with multiple plans - we must clear // the cache before working with the next plan. - String jobID = getHiveJobID(job); - if (jobID != null) { - gWorkMap.remove(jobID); + if (mapPath != null) { + gWorkMap.remove(mapPath); + } + if (reducePath != null) { + gWorkMap.remove(reducePath); } } } public static MapredWork getMapRedWork(Configuration job) { - MapredWork gWork = null; + MapredWork w = new MapredWork(); + w.setMapWork(getMapWork(job)); + w.setReduceWork(getReduceWork(job)); + return w; + } + + public static MapWork getMapWork(Configuration job) { + return (MapWork) getBaseWork(job, MAP_PLAN_NAME); + } + + public static ReduceWork getReduceWork(Configuration job) { + return (ReduceWork) getBaseWork(job, REDUCE_PLAN_NAME); + } + + public static BaseWork getBaseWork(Configuration job, String name) { + BaseWork gWork = null; try { - String jobID = getHiveJobID(job); - assert jobID != null; - gWork = gWorkMap.get(jobID); + Path path = getPlanPath(job, name); + assert path != null; + gWork = gWorkMap.get(path); if (gWork == null) { String jtConf = ShimLoader.getHadoopShims().getJobLauncherRpcAddress(job); - String path; + Path localPath; if (jtConf.equals("local")) { - String planPath = HiveConf.getVar(job, HiveConf.ConfVars.PLAN); - path = new Path(planPath).toUri().getPath(); + localPath = path; } else { - path = "HIVE_PLAN" + jobID; + localPath = new Path(name); } - InputStream in = new FileInputStream(path); - MapredWork ret = deserializeMapRedWork(in, job); + InputStream in = new FileInputStream(localPath.toUri().getPath()); + BaseWork ret = deserializeObject(in); gWork = ret; - gWork.initialize(); - gWorkMap.put(jobID, gWork); + gWorkMap.put(path, gWork); } - return (gWork); + return gWork; + } catch (FileNotFoundException fnf) { + // happens. e.g.: no reduce work. + return null; } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); @@ -236,15 +266,18 @@ public static MapredWork getMapRedWork(Configuration job) { public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) { try { Graph stageGraph = plan.getQueryPlan().getStageGraph(); - if (stageGraph == null) + if (stageGraph == null) { return; + } List adjList = stageGraph.getAdjacencyList(); - if (adjList == null) + if (adjList == null) { return; + } for (Adjacency adj : adjList) { List children = adj.getChildren(); - if (children == null || children.isEmpty()) + if (children == null || children.isEmpty()) { return; + } conf.setStrings("mapreduce.workflow.adjacency."+adj.getNode(), children.toArray(new String[children.size()])); } @@ -365,25 +398,40 @@ protected void initialize(Class type, Object oldInstance, Object newInstance, } public static void setMapRedWork(Configuration job, MapredWork w, String hiveScratchDir) { + setMapWork(job, w.getMapWork(), hiveScratchDir); + if (w.getReduceWork() != null) { + setReduceWork(job, w.getReduceWork(), hiveScratchDir); + } + } + + public static void setMapWork(Configuration job, MapWork w, String hiveScratchDir) { + setBaseWork(job, w, hiveScratchDir, MAP_PLAN_NAME); + } + + public static void setReduceWork(Configuration job, ReduceWork w, String hiveScratchDir) { + setBaseWork(job, w, hiveScratchDir, REDUCE_PLAN_NAME); + } + + public static void setBaseWork(Configuration job, BaseWork w, String hiveScratchDir, String name) { try { + setPlanPath(job, hiveScratchDir); - // this is the unique job ID, which is kept in JobConf as part of the plan file name - String jobID = UUID.randomUUID().toString(); - Path planPath = new Path(hiveScratchDir, jobID); - HiveConf.setVar(job, HiveConf.ConfVars.PLAN, planPath.toUri().toString()); + Path planPath = getPlanPath(job, name); // use the default file system of the job FileSystem fs = planPath.getFileSystem(job); FSDataOutputStream out = fs.create(planPath); - serializeMapRedWork(w, out); + serializeObject(w, out); // Serialize the plan to the default hdfs instance // Except for hadoop local mode execution where we should be // able to get the plan directly from the cache if (!ShimLoader.getHadoopShims().isLocalMode(job)) { // Set up distributed cache - DistributedCache.createSymlink(job); - String uriWithLink = planPath.toUri().toString() + "#HIVE_PLAN" + jobID; + if (!DistributedCache.getSymlink(job)) { + DistributedCache.createSymlink(job); + } + String uriWithLink = planPath.toUri().toString() + "#" + name; DistributedCache.addCacheFile(new URI(uriWithLink), job); // set replication of the plan file to a high number. we use the same @@ -393,18 +441,36 @@ public static void setMapRedWork(Configuration job, MapredWork w, String hiveScr } // Cache the plan in this process - w.initialize(); - gWorkMap.put(jobID, w); + gWorkMap.put(planPath, w); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } } - public static String getHiveJobID(Configuration job) { - String planPath = HiveConf.getVar(job, HiveConf.ConfVars.PLAN); - if (planPath != null && !planPath.isEmpty()) { - return (new Path(planPath)).getName(); + private static Path getPlanPath(Configuration job, String name) { + Path planPath = getPlanPath(job); + if (planPath == null) { + return null; + } + return new Path(planPath, name); + } + + private static void setPlanPath(Configuration job, String hiveScratchDir) throws IOException { + if (getPlanPath(job) == null) { + // this is the unique job ID, which is kept in JobConf as part of the plan file name + String jobID = UUID.randomUUID().toString(); + Path planPath = new Path(hiveScratchDir, jobID); + FileSystem fs = planPath.getFileSystem(job); + fs.mkdirs(planPath); + HiveConf.setVar(job, HiveConf.ConfVars.PLAN, planPath.toUri().toString()); + } + } + + private static Path getPlanPath(Configuration job) { + String plan = HiveConf.getVar(job, HiveConf.ConfVars.PLAN); + if (plan != null && !plan.isEmpty()) { + return new Path(plan); } return null; } @@ -443,26 +509,6 @@ public static ExprNodeDesc deserializeExpression(String s, Configuration conf) { } } - /** - * Serialize a single Task. - */ - public static void serializeTasks(Task t, OutputStream out) { - XMLEncoder e = null; - try { - e = new XMLEncoder(out); - // workaround for java 1.5 - e.setPersistenceDelegate(ExpressionTypes.class, new EnumDelegate()); - e.setPersistenceDelegate(GroupByDesc.Mode.class, new EnumDelegate()); - e.setPersistenceDelegate(Operator.ProgressCounter.class, new EnumDelegate()); - - e.writeObject(t); - } finally { - if (null != e) { - e.close(); - } - } - } - public static class CollectionPersistenceDelegate extends DefaultPersistenceDelegate { @Override protected Expression instantiate(Object oldInstance, Encoder out) { @@ -479,14 +525,15 @@ protected void initialize(Class type, Object oldInstance, Object newInstance, En } /** - * Serialize the whole query plan. + * Serialize the object. This helper function mainly makes sure that enums, + * counters, etc are handled properly. */ - public static void serializeQueryPlan(QueryPlan plan, OutputStream out) { + public static void serializeObject(Object plan, OutputStream out) { XMLEncoder e = new XMLEncoder(out); e.setExceptionListener(new ExceptionListener() { public void exceptionThrown(Exception e) { LOG.warn(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new RuntimeException("Cannot serialize the query plan", e); + throw new RuntimeException("Cannot serialize object", e); } }); // workaround for java 1.5 @@ -502,79 +549,14 @@ public void exceptionThrown(Exception e) { } /** - * Deserialize the whole query plan. - */ - public static QueryPlan deserializeQueryPlan(InputStream in, Configuration conf) { - XMLDecoder d = null; - try { - d = new XMLDecoder(in, null, null); - QueryPlan ret = (QueryPlan) d.readObject(); - return (ret); - } finally { - if (null != d) { - d.close(); - } - } - } - - /** - * Serialize the mapredWork object to an output stream. DO NOT use this to write to standard - * output since it closes the output stream. DO USE mapredWork.toXML() instead. - */ - public static void serializeMapRedWork(MapredWork w, OutputStream out) { - XMLEncoder e = null; - try { - e = new XMLEncoder(out); - // workaround for java 1.5 - e.setPersistenceDelegate(ExpressionTypes.class, new EnumDelegate()); - e.setPersistenceDelegate(GroupByDesc.Mode.class, new EnumDelegate()); - e.writeObject(w); - } finally { - if (null != e) { - e.close(); - } - } - - } - - public static MapredWork deserializeMapRedWork(InputStream in, Configuration conf) { - XMLDecoder d = null; - try { - d = new XMLDecoder(in, null, null); - MapredWork ret = (MapredWork) d.readObject(); - return (ret); - } finally { - if (null != d) { - d.close(); - } - } - } - - /** - * Serialize the mapredLocalWork object to an output stream. DO NOT use this to write to standard - * output since it closes the output stream. DO USE mapredWork.toXML() instead. + * De-serialize an object. This helper function mainly makes sure that enums, + * counters, etc are handled properly. */ - public static void serializeMapRedLocalWork(MapredLocalWork w, OutputStream out) { - XMLEncoder e = null; - try { - e = new XMLEncoder(out); - // workaround for java 1.5 - e.setPersistenceDelegate(ExpressionTypes.class, new EnumDelegate()); - e.setPersistenceDelegate(GroupByDesc.Mode.class, new EnumDelegate()); - e.writeObject(w); - } finally { - if (null != e) { - e.close(); - } - } - } - - public static MapredLocalWork deserializeMapRedLocalWork(InputStream in, Configuration conf) { + public static T deserializeObject(InputStream in) { XMLDecoder d = null; try { d = new XMLDecoder(in, null, null); - MapredLocalWork ret = (MapredLocalWork) d.readObject(); - return (ret); + return (T) d.readObject(); } finally { if (null != d) { d.close(); @@ -1753,7 +1735,7 @@ public static void copyTableJobPropertiesToConf(TableDesc tbl, JobConf job) { * @return the summary of all the input paths. * @throws IOException */ - public static ContentSummary getInputSummary(Context ctx, MapredWork work, PathFilter filter) + public static ContentSummary getInputSummary(Context ctx, MapWork work, PathFilter filter) throws IOException { long[] summary = {0, 0, 0}; @@ -2214,7 +2196,7 @@ public static void reworkMapRedWork(Task task, try { MapredWork mapredWork = ((MapRedTask) task).getWork(); Set> reworkInputFormats = new HashSet>(); - for (PartitionDesc part : mapredWork.getPathToPartitionInfo().values()) { + for (PartitionDesc part : mapredWork.getMapWork().getPathToPartitionInfo().values()) { Class inputFormatCls = part .getInputFileFormatClass(); if (ReworkMapredInputFormat.class.isAssignableFrom(inputFormatCls)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java index 9fbabae..a84b10e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.exec.FetchOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.exec.HiveTotalOrderPartitioner; import org.apache.hadoop.hive.ql.exec.JobCloseFeedBack; import org.apache.hadoop.hive.ql.exec.Operator; @@ -63,7 +64,6 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; @@ -73,10 +73,12 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; @@ -102,7 +104,7 @@ import org.apache.log4j.varia.NullAppender; /** - * ExecDriver is the central class in co-ordinating execution of any map-reduce task. + * ExecDriver is the central class in co-ordinating execution of any map-reduce task. * It's main responsabilities are: * * - Converting the plan (MapredWork) into a MR Job (JobConf) @@ -196,13 +198,13 @@ public ExecDriver(MapredWork plan, JobConf job, boolean isSilent) throws HiveExc * @return true if fatal errors happened during job execution, false otherwise. */ public boolean checkFatalErrors(Counters ctrs, StringBuilder errMsg) { - for (Operator op : work.getAliasToWork().values()) { + for (Operator op : work.getMapWork().getAliasToWork().values()) { if (op.checkFatalErrors(ctrs, errMsg)) { return true; } } - if (work.getReducer() != null) { - if (work.getReducer().checkFatalErrors(ctrs, errMsg)) { + if (work.getReduceWork() != null) { + if (work.getReduceWork().getReducer().checkFatalErrors(ctrs, errMsg)) { return true; } } @@ -211,18 +213,18 @@ public boolean checkFatalErrors(Counters ctrs, StringBuilder errMsg) { protected void createTmpDirs() throws IOException { // fix up outputs - Map> pa = work.getPathToAliases(); + Map> pa = work.getMapWork().getPathToAliases(); if (pa != null) { List> opList = new ArrayList>(); - if (work.getReducer() != null) { - opList.add(work.getReducer()); + if (work.getReduceWork() != null) { + opList.add(work.getReduceWork().getReducer()); } for (List ls : pa.values()) { for (String a : ls) { - opList.add(work.getAliasToWork().get(a)); + opList.add(work.getMapWork().getAliasToWork().get(a)); while (!opList.isEmpty()) { Operator op = opList.remove(0); @@ -251,6 +253,7 @@ protected void createTmpDirs() throws IOException { /** * Execute a query plan using Hadoop. */ + @SuppressWarnings("deprecation") @Override public int execute(DriverContext driverContext) { @@ -259,16 +262,14 @@ public int execute(DriverContext driverContext) { boolean success = true; - String invalidReason = work.isInvalid(); - if (invalidReason != null) { - throw new RuntimeException("Plan invalid, Reason: " + invalidReason); - } - Context ctx = driverContext.getCtx(); boolean ctxCreated = false; String emptyScratchDirStr; Path emptyScratchDir; + MapWork mWork = work.getMapWork(); + ReduceWork rWork = work.getReduceWork(); + try { if (ctx == null) { ctx = new Context(job); @@ -301,27 +302,27 @@ public int execute(DriverContext driverContext) { throw new RuntimeException(e.getMessage()); } - if (work.getNumMapTasks() != null) { - job.setNumMapTasks(work.getNumMapTasks().intValue()); + if (mWork.getNumMapTasks() != null) { + job.setNumMapTasks(mWork.getNumMapTasks().intValue()); } - if (work.getMaxSplitSize() != null) { - HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, work.getMaxSplitSize().longValue()); + if (mWork.getMaxSplitSize() != null) { + HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue()); } - if (work.getMinSplitSize() != null) { - HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue()); + if (mWork.getMinSplitSize() != null) { + HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue()); } - if (work.getMinSplitSizePerNode() != null) { - HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, work.getMinSplitSizePerNode().longValue()); + if (mWork.getMinSplitSizePerNode() != null) { + HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mWork.getMinSplitSizePerNode().longValue()); } - if (work.getMinSplitSizePerRack() != null) { - HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, work.getMinSplitSizePerRack().longValue()); + if (mWork.getMinSplitSizePerRack() != null) { + HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mWork.getMinSplitSizePerRack().longValue()); } - job.setNumReduceTasks(work.getNumReduceTasks().intValue()); + job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0); job.setReducerClass(ExecReducer.class); // set input format information if necessary @@ -338,7 +339,7 @@ public int execute(DriverContext driverContext) { inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); } - if (getWork().isUseBucketizedHiveInputFormat()) { + if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); } @@ -387,11 +388,11 @@ public int execute(DriverContext driverContext) { } try{ - MapredLocalWork localwork = work.getMapLocalWork(); + MapredLocalWork localwork = mWork.getMapLocalWork(); if (localwork != null) { if (!ShimLoader.getHadoopShims().isLocalMode(job)) { Path localPath = new Path(localwork.getTmpFileURI()); - Path hdfsPath = new Path(work.getTmpHDFSFileURI()); + Path hdfsPath = new Path(mWork.getTmpHDFSFileURI()); FileSystem hdfs = hdfsPath.getFileSystem(job); FileSystem localFS = localPath.getFileSystem(job); @@ -429,17 +430,17 @@ public int execute(DriverContext driverContext) { } } work.configureJobConf(job); - addInputPaths(job, work, emptyScratchDirStr, ctx); + addInputPaths(job, mWork, emptyScratchDirStr, ctx); Utilities.setMapRedWork(job, work, ctx.getMRTmpFileURI()); - if (work.getSamplingType() > 0 && work.getNumReduceTasks() > 1) { + if (mWork.getSamplingType() > 0 && rWork != null && rWork.getNumReduceTasks() > 1) { try { - handleSampling(driverContext, work, job, new HiveConf(conf)); + handleSampling(driverContext, mWork, job, new HiveConf(conf)); job.setPartitionerClass(HiveTotalOrderPartitioner.class); } catch (Exception e) { console.printInfo("Not enough sampling data.. Rolling back to single reducer task"); - work.setNumReduceTasks(1); + rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } } @@ -454,7 +455,7 @@ public int execute(DriverContext driverContext) { // make this client wait if job trcker is not behaving well. Throttle.checkJobTracker(job, LOG); - if (work.isGatheringStats()) { + if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) { // initialize stats publishing table StatsPublisher statsPublisher; String statsImplementationClass = HiveConf.getVar(job, HiveConf.ConfVars.HIVESTATSDBCLASS); @@ -496,7 +497,7 @@ public int execute(DriverContext driverContext) { success = false; returnVal = 1; } finally { - Utilities.clearMapRedWork(job); + Utilities.clearWork(job); try { if (ctxCreated) { ctx.clear(); @@ -517,13 +518,13 @@ public int execute(DriverContext driverContext) { try { if (rj != null) { JobCloseFeedBack feedBack = new JobCloseFeedBack(); - if (work.getAliasToWork() != null) { - for (Operator op : work.getAliasToWork().values()) { + if (mWork.getAliasToWork() != null) { + for (Operator op : mWork.getAliasToWork().values()) { op.jobClose(job, success, feedBack); } } - if (work.getReducer() != null) { - work.getReducer().jobClose(job, success, feedBack); + if (rWork != null) { + rWork.getReducer().jobClose(job, success, feedBack); } } } catch (Exception e) { @@ -539,16 +540,16 @@ public int execute(DriverContext driverContext) { return (returnVal); } - private void handleSampling(DriverContext context, MapredWork work, JobConf job, HiveConf conf) + private void handleSampling(DriverContext context, MapWork mWork, JobConf job, HiveConf conf) throws Exception { - assert work.getAliasToWork().keySet().size() == 1; + assert mWork.getAliasToWork().keySet().size() == 1; - String alias = work.getAliases().get(0); - Operator topOp = work.getAliasToWork().get(alias); - PartitionDesc partDesc = work.getAliasToPartnInfo().get(alias); + String alias = mWork.getAliases().get(0); + Operator topOp = mWork.getAliasToWork().get(alias); + PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias); - ArrayList paths = work.getPaths(); - ArrayList parts = work.getPartitionDescs(); + ArrayList paths = mWork.getPaths(); + ArrayList parts = mWork.getPartitionDescs(); Path onePath = new Path(paths.get(0)); String tmpPath = context.getCtx().getExternalTmpFileURI(onePath.toUri()); @@ -558,7 +559,7 @@ private void handleSampling(DriverContext context, MapredWork work, JobConf job, PartitionKeySampler sampler = new PartitionKeySampler(); - if (work.getSamplingType() == MapredWork.SAMPLING_ON_PREV_MR) { + if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) { console.printInfo("Use sampling data created in previous MR"); // merges sampling data from previous MR and make paritition keys for total sort for (String path : paths) { @@ -568,7 +569,7 @@ private void handleSampling(DriverContext context, MapredWork work, JobConf job, sampler.addSampleFile(status.getPath(), job); } } - } else if (work.getSamplingType() == MapredWork.SAMPLING_ON_START) { + } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) { console.printInfo("Creating sampling data.."); assert topOp instanceof TableScanOperator; TableScanOperator ts = (TableScanOperator) topOp; @@ -592,7 +593,7 @@ private void handleSampling(DriverContext context, MapredWork work, JobConf job, fetcher.clearFetchContext(); } } else { - throw new IllegalArgumentException("Invalid sampling type " + work.getSamplingType()); + throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType()); } sampler.writePartitionKeys(partitionFile, job); } @@ -601,16 +602,17 @@ private void handleSampling(DriverContext context, MapredWork work, JobConf job, * Set hive input format, and input format file if necessary. */ protected void setInputAttributes(Configuration conf) { - if (work.getInputformat() != null) { - HiveConf.setVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat()); + MapWork mWork = work.getMapWork(); + if (mWork.getInputformat() != null) { + HiveConf.setVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT, mWork.getInputformat()); } - if (work.getIndexIntermediateFile() != null) { - conf.set("hive.index.compact.file", work.getIndexIntermediateFile()); - conf.set("hive.index.blockfilter.file", work.getIndexIntermediateFile()); + if (mWork.getIndexIntermediateFile() != null) { + conf.set("hive.index.compact.file", mWork.getIndexIntermediateFile()); + conf.set("hive.index.blockfilter.file", mWork.getIndexIntermediateFile()); } // Intentionally overwrites anything the user may have put here - conf.setBoolean("hive.input.format.sorted", work.isInputFormatSorted()); + conf.setBoolean("hive.input.format.sorted", mWork.isInputFormatSorted()); } public boolean mapStarted() { @@ -757,12 +759,12 @@ public static void main(String[] args) throws IOException, HiveException { int ret; if (localtask) { memoryMXBean = ManagementFactory.getMemoryMXBean(); - MapredLocalWork plan = Utilities.deserializeMapRedLocalWork(pathData, conf); + MapredLocalWork plan = (MapredLocalWork) Utilities.deserializeObject(pathData); MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent); ret = ed.executeFromChildJVM(new DriverContext()); } else { - MapredWork plan = Utilities.deserializeMapRedWork(pathData, conf); + MapredWork plan = (MapredWork) Utilities.deserializeObject(pathData); ExecDriver ed = new ExecDriver(plan, conf, isSilent); ret = ed.execute(new DriverContext()); } @@ -823,19 +825,19 @@ public boolean isMapRedTask() { @Override public Collection> getTopOperators() { - return getWork().getAliasToWork().values(); + return getWork().getMapWork().getAliasToWork().values(); } @Override public boolean hasReduce() { MapredWork w = getWork(); - return w.getReducer() != null; + return w.getReduceWork() != null; } /** * Handle a empty/null path for a given alias. */ - private static int addInputPath(String path, JobConf job, MapredWork work, String hiveScratchDir, + private static int addInputPath(String path, JobConf job, MapWork work, String hiveScratchDir, int numEmptyPaths, boolean isEmptyPath, String alias) throws Exception { // either the directory does not exist or it is empty assert path == null || isEmptyPath; @@ -919,7 +921,7 @@ private static int addInputPath(String path, JobConf job, MapredWork work, Strin return numEmptyPaths; } - public static void addInputPaths(JobConf job, MapredWork work, String hiveScratchDir, Context ctx) + public static void addInputPaths(JobConf job, MapWork work, String hiveScratchDir, Context ctx) throws Exception { int numEmptyPaths = 0; @@ -1002,11 +1004,11 @@ public String getName() { @Override public void updateCounters(Counters ctrs, RunningJob rj) throws IOException { - for (Operator op : work.getAliasToWork().values()) { + for (Operator op : work.getMapWork().getAliasToWork().values()) { op.updateCounters(ctrs); } - if (work.getReducer() != null) { - work.getReducer().updateCounters(ctrs); + if (work.getReduceWork() != null) { + work.getReduceWork().getReducer().updateCounters(ctrs); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java index 444e29b..d1e82a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java @@ -30,11 +30,11 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.FetchOperator; import org.apache.hadoop.hive.ql.exec.MapOperator; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.MapredContext; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; -import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; @@ -45,10 +45,10 @@ import org.apache.hadoop.util.StringUtils; /** - * ExecMapper is the generic Map class for Hive. Together with ExecReducer it is + * ExecMapper is the generic Map class for Hive. Together with ExecReducer it is * the bridge between the map-reduce framework and the Hive operator pipeline at * execution time. It's main responsabilities are: - * + * * - Load and setup the operator pipeline from XML * - Run the pipeline by transforming key value pairs to records and forwarding them to the operators * - Stop execution when the "limit" is reached @@ -96,7 +96,7 @@ public void configure(JobConf job) { jc = job; execContext.setJc(jc); // create map and fetch operators - MapredWork mrwork = Utilities.getMapRedWork(job); + MapWork mrwork = Utilities.getMapWork(job); mo = new MapOperator(); mo.setConf(mrwork); // initialize map operator diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java index 82fd3d5..3a33712 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java @@ -28,12 +28,12 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.MapredContext; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDe; @@ -53,10 +53,10 @@ import org.apache.hadoop.util.StringUtils; /** - * ExecReducer is the generic Reducer class for Hive. Together with ExecMapper it is + * ExecReducer is the generic Reducer class for Hive. Together with ExecMapper it is * the bridge between the map-reduce framework and the Hive operator pipeline at * execution time. It's main responsabilities are: - * + * * - Load and setup the operator pipeline from XML * - Run the pipeline by transforming key, value pairs to records and forwarding them to the operators * - Sending start and end group messages to separate records with same key from one another @@ -121,7 +121,7 @@ public void configure(JobConf job) { l4j.info("cannot get classpath: " + e.getMessage()); } jc = job; - MapredWork gWork = Utilities.getMapRedWork(job); + ReduceWork gWork = Utilities.getReduceWork(job); reducer = gWork.getReducer(); reducer.setParentOperators(null); // clear out any parents as reducer is the // root diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java index 9676e7e..d2210b3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java @@ -40,8 +40,10 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.mapred.JobConf; @@ -101,7 +103,7 @@ public int execute(DriverContext driverContext) { conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) { if (inputSummary == null) { - inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work, null); + inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work.getMapWork(), null); } // set the values of totalInputFileSize and totalInputNumFiles, estimating them @@ -109,7 +111,7 @@ public int execute(DriverContext driverContext) { estimateInputSize(); // at this point the number of reducers is precisely defined in the plan - int numReducers = work.getNumReduceTasks(); + int numReducers = work.getReduceWork() == null ? 0 : work.getReduceWork().getNumReduceTasks(); if (LOG.isDebugEnabled()) { LOG.debug("Task: " + getId() + ", Summary: " + @@ -177,7 +179,7 @@ public int execute(DriverContext driverContext) { OutputStream out = FileSystem.getLocal(conf).create(planPath); MapredWork plan = getWork(); LOG.info("Generating plan file " + planPath.toString()); - Utilities.serializeMapRedWork(plan, out); + Utilities.serializeObject(plan, out); String isSilent = "true".equalsIgnoreCase(System .getProperty("test.silent")) ? "-nolog" : ""; @@ -383,26 +385,26 @@ public boolean reduceDone() { * Set the number of reducers for the mapred work. */ private void setNumberOfReducers() throws IOException { + ReduceWork rWork = work.getReduceWork(); // this is a temporary hack to fix things that are not fixed in the compiler - Integer numReducersFromWork = work.getNumReduceTasks(); + Integer numReducersFromWork = rWork == null ? 0 : rWork.getNumReduceTasks(); - if (work.getReducer() == null) { + if (rWork == null) { console .printInfo("Number of reduce tasks is set to 0 since there's no reduce operator"); - work.setNumReduceTasks(Integer.valueOf(0)); } else { if (numReducersFromWork >= 0) { console.printInfo("Number of reduce tasks determined at compile time: " - + work.getNumReduceTasks()); + + rWork.getNumReduceTasks()); } else if (job.getNumReduceTasks() > 0) { int reducers = job.getNumReduceTasks(); - work.setNumReduceTasks(reducers); + rWork.setNumReduceTasks(reducers); console .printInfo("Number of reduce tasks not specified. Defaulting to jobconf value of: " + reducers); } else { int reducers = estimateNumberOfReducers(); - work.setNumReduceTasks(reducers); + rWork.setNumReduceTasks(reducers); console .printInfo("Number of reduce tasks not specified. Estimated from input data size: " + reducers); @@ -437,7 +439,7 @@ private int estimateNumberOfReducers() throws IOException { if(inputSummary == null) { // compute the summary and stash it away - inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work, null); + inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work.getMapWork(), null); } // if all inputs are sampled, we should shrink the size of reducers accordingly. @@ -459,7 +461,7 @@ private int estimateNumberOfReducers() throws IOException { // and the user has configured Hive to do this, make sure the number of reducers is a // power of two if (conf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO) && - work.isFinalMapRed() && !work.getBucketedColsByDirectory().isEmpty()) { + work.isFinalMapRed() && !work.getMapWork().getBucketedColsByDirectory().isEmpty()) { int reducersLog = (int)(Math.log(reducers) / Math.log(2)) + 1; int reducersPowerTwo = (int)Math.pow(2, reducersLog); @@ -497,11 +499,13 @@ private void estimateInputSize() { return; } + MapWork mWork = work.getMapWork(); + // Initialize the values to be those taken from the input summary totalInputFileSize = inputSummary.getLength(); totalInputNumFiles = inputSummary.getFileCount(); - if (work.getNameToSplitSample() == null || work.getNameToSplitSample().isEmpty()) { + if (mWork.getNameToSplitSample() == null || mWork.getNameToSplitSample().isEmpty()) { // If percentage block sampling wasn't used, we don't need to do any estimation inputSizeEstimated = true; return; @@ -510,10 +514,10 @@ private void estimateInputSize() { // if all inputs are sampled, we should shrink the size of the input accordingly double highestSamplePercentage = 0; boolean allSample = false; - for (String alias : work.getAliasToWork().keySet()) { - if (work.getNameToSplitSample().containsKey(alias)) { + for (String alias : mWork.getAliasToWork().keySet()) { + if (mWork.getNameToSplitSample().containsKey(alias)) { allSample = true; - Double rate = work.getNameToSplitSample().get(alias).getPercent(); + Double rate = mWork.getNameToSplitSample().get(alias).getPercent(); if (rate != null && rate > highestSamplePercentage) { highestSamplePercentage = rate; } @@ -580,7 +584,7 @@ public static String isEligibleForLocalMode(HiveConf conf, @Override public Operator getReducer() { - return getWork().getReducer(); + return getWork().getReduceWork() == null ? null : getWork().getReduceWork().getReducer(); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java index f72ecfb..f38ba94 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java @@ -141,7 +141,7 @@ public int execute(DriverContext driverContext) { OutputStream out = FileSystem.getLocal(conf).create(planPath); MapredLocalWork plan = getWork(); LOG.info("Generating plan file " + planPath.toString()); - Utilities.serializeMapRedLocalWork(plan, out); + Utilities.serializeObject(plan, out); String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : ""; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java index 81f7a99..af4e208 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -188,7 +189,7 @@ public void generateIndexQuery(List indexes, ExprNodeDesc predicate, if (pctx.getConf().getBoolVar(ConfVars.HIVE_INDEX_COMPACT_BINARY_SEARCH) && useSorted) { // For now, only works if the predicate is a single condition - MapredWork work = null; + MapWork work = null; String originalInputFormat = null; for (Task task : driver.getPlan().getRootTasks()) { // The index query should have one and only one map reduce task in the root tasks @@ -202,7 +203,9 @@ public void generateIndexQuery(List indexes, ExprNodeDesc predicate, work.setInputFormatSorted(false); break; } - work = (MapredWork)task.getWork(); + if (task.getWork() != null) { + work = ((MapredWork)task.getWork()).getMapWork(); + } String inputFormat = work.getInputformat(); originalInputFormat = inputFormat; if (inputFormat == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java index 9ab4b24..b9914dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java @@ -95,7 +95,7 @@ public CombineHiveInputSplit(JobConf job, InputSplitShim inputSplitShim) this.inputSplitShim = inputSplitShim; if (job != null) { Map pathToPartitionInfo = Utilities - .getMapRedWork(job).getPathToPartitionInfo(); + .getMapWork(job).getPathToPartitionInfo(); // extract all the inputFormatClass names for each chunk in the // CombinedSplit. @@ -200,7 +200,7 @@ public void write(DataOutput out) throws IOException { if (inputFormatClassName == null) { Map pathToPartitionInfo = Utilities - .getMapRedWork(getJob()).getPathToPartitionInfo(); + .getMapWork(getJob()).getPathToPartitionInfo(); // extract all the inputFormatClass names for each chunk in the // CombinedSplit. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index adf4923..b04b34e 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -37,7 +37,7 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; @@ -249,10 +249,10 @@ public RecordReader getRecordReader(InputSplit split, JobConf job, } protected Map pathToPartitionInfo; - MapredWork mrwork = null; + MapWork mrwork = null; protected void init(JobConf job) { - mrwork = Utilities.getMapRedWork(job); + mrwork = Utilities.getMapWork(job); pathToPartitionInfo = mrwork.getPathToPartitionInfo(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java index 9ae58f4..37e3879 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java @@ -37,10 +37,10 @@ public class SymbolicInputFormat implements ReworkMapredInputFormat { public void rework(HiveConf job, MapredWork work) throws IOException { - Map pathToParts = work.getPathToPartitionInfo(); + Map pathToParts = work.getMapWork().getPathToPartitionInfo(); List toRemovePaths = new ArrayList(); Map toAddPathToPart = new HashMap(); - Map> pathToAliases = work.getPathToAliases(); + Map> pathToAliases = work.getMapWork().getPathToAliases(); for (Map.Entry pathPartEntry : pathToParts .entrySet()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java index dbc999f..ed2a9af 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hive.ql.io.avro; +import java.io.IOException; +import java.util.Map; +import java.util.Properties; + import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.generic.GenericData; @@ -29,18 +33,17 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.serde2.avro.AvroGenericRecordWritable; import org.apache.hadoop.hive.serde2.avro.AvroSerdeException; import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapred.*; - - -import java.io.IOException; -import java.util.Map; -import java.util.Properties; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobConfigurable; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; /** * RecordReader optimized against Avro GenericRecords that returns to record @@ -67,7 +70,9 @@ public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) GenericDatumReader gdr = new GenericDatumReader(); - if(latest != null) gdr.setExpected(latest); + if(latest != null) { + gdr.setExpected(latest); + } this.reader = new DataFileReader(new FsInput(split.getPath(), job), gdr); this.reader.sync(split.getStart()); @@ -86,11 +91,11 @@ private Schema getSchema(JobConf job, FileSplit split) throws AvroSerdeException FileSystem fs = split.getPath().getFileSystem(job); // Inside of a MR job, we can pull out the actual properties if(AvroSerdeUtils.insideMRJob(job)) { - MapredWork mapRedWork = Utilities.getMapRedWork(job); + MapWork mapWork = Utilities.getMapWork(job); // Iterate over the Path -> Partition descriptions to find the partition // that matches our input split. - for (Map.Entry pathsAndParts: mapRedWork.getPathToPartitionInfo().entrySet()){ + for (Map.Entry pathsAndParts: mapWork.getPathToPartitionInfo().entrySet()){ String partitionPath = pathsAndParts.getKey(); if(pathIsInPartition(split.getPath(), partitionPath)) { if(LOG.isInfoEnabled()) { @@ -101,11 +106,15 @@ private Schema getSchema(JobConf job, FileSplit split) throws AvroSerdeException Properties props = pathsAndParts.getValue().getProperties(); if(props.containsKey(AvroSerdeUtils.SCHEMA_LITERAL) || props.containsKey(AvroSerdeUtils.SCHEMA_URL)) { return AvroSerdeUtils.determineSchemaOrThrowException(props); - } else + } + else { return null; // If it's not in this property, it won't be in any others + } } } - if(LOG.isInfoEnabled()) LOG.info("Unable to match filesplit " + split + " with a partition."); + if(LOG.isInfoEnabled()) { + LOG.info("Unable to match filesplit " + split + " with a partition."); + } } // In "select * from table" situations (non-MR), we can add things to the job diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java index ad14966..ec45ae4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java @@ -192,7 +192,7 @@ public int execute(DriverContext driverContext) { try { addInputPaths(job, work); - Utilities.setMapRedWork(job, work, ctx.getMRTmpFileURI()); + Utilities.setMapWork(job, work, ctx.getMRTmpFileURI()); // remove the pwd from conf file so that job tracker doesn't show this // logs diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java index 16f45ec..bc306c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/MergeWork.java @@ -33,14 +33,14 @@ import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; -import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.Mapper; @Explain(displayName = "Block level merge") -public class MergeWork extends MapredWork implements Serializable { +public class MergeWork extends MapWork implements Serializable { private static final long serialVersionUID = 1L; @@ -70,9 +70,6 @@ public MergeWork(List inputPaths, String outputDir, if(this.getPathToPartitionInfo() == null) { this.setPathToPartitionInfo(new LinkedHashMap()); } - if(this.getNumReduceTasks() == null) { - this.setNumReduceTasks(0); - } for(String path: this.inputPaths) { this.getPathToPartitionInfo().put(path, partDesc); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java index f66b82e..403ee10 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; @@ -181,7 +182,9 @@ public int execute(DriverContext driverContext) { try { addInputPaths(job, work); - Utilities.setMapRedWork(job, work, ctx.getMRTmpFileURI()); + MapredWork mrWork = new MapredWork(); + mrWork.setMapWork(work); + Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpFileURI()); // remove the pwd from conf file so that job tracker doesn't show this // logs diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java index 4a1a67e..4eb86ba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat; import org.apache.hadoop.hive.ql.plan.Explain; -import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.mapred.Mapper; @@ -34,7 +34,7 @@ * */ @Explain(displayName = "Partial Scan Statistics") -public class PartialScanWork extends MapredWork implements Serializable { +public class PartialScanWork extends MapWork implements Serializable { private static final long serialVersionUID = 1L; @@ -52,9 +52,6 @@ public PartialScanWork(List inputPaths) { if(this.getPathToPartitionInfo() == null) { this.setPathToPartitionInfo(new LinkedHashMap()); } - if(this.getNumReduceTasks() == null) { - this.setNumReduceTasks(0); - } for(String path: this.inputPaths) { this.getPathToPartitionInfo().put(path, partDesc); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java index 47b18ba..f0678ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java @@ -76,7 +76,7 @@ public ColumnTruncateMapper() { @Override public void configure(JobConf job) { jc = job; - work = (ColumnTruncateWork) Utilities.getMapRedWork(job); + work = (ColumnTruncateWork) Utilities.getMapWork(job); String specPath = work.getOutputDir(); Path tmpPath = Utilities.toTempPath(specPath); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java index 6beb54d..e192d7b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.mr.Throttle; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; +import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; @@ -165,7 +166,9 @@ public int execute(DriverContext driverContext) { try { addInputPaths(job, work); - Utilities.setMapRedWork(job, work, ctx.getMRTmpFileURI()); + MapredWork mrWork = new MapredWork(); + mrWork.setMapWork(work); + Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpFileURI()); // remove the pwd from conf file so that job tracker doesn't show this // logs diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java index edbb098..1a0b2ab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java @@ -27,12 +27,12 @@ import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; -import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.mapred.Mapper; @Explain(displayName = "Column Truncate") -public class ColumnTruncateWork extends MapredWork implements Serializable { +public class ColumnTruncateWork extends MapWork implements Serializable { private static final long serialVersionUID = 1L; @@ -64,9 +64,6 @@ public ColumnTruncateWork(List droppedColumns, String inputDir, String if(this.getPathToPartitionInfo() == null) { this.setPathToPartitionInfo(new LinkedHashMap()); } - if(this.getNumReduceTasks() == null) { - this.setNumReduceTasks(0); - } this.getPathToPartitionInfo().put(inputDir, partDesc); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java index 4163fd0..21607b4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java @@ -57,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -142,10 +143,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // or for a map-reduce job MapredWork currWork = (MapredWork) currTask.getWork(); boolean mergeMapOnly = - hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && currWork.getReducer() == null; + hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && currWork.getReduceWork() == null; boolean mergeMapRed = hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && - currWork.getReducer() != null; + currWork.getReduceWork() != null; if (mergeMapOnly || mergeMapRed) { chDir = true; } @@ -201,7 +202,7 @@ private void processLinkedFileDesc(GenMRProcContext ctx, if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, - (MapredWork) currTask.getWork(), false, ctx); + ((MapredWork) currTask.getWork()).getMapWork(), false, ctx); } if (!rootTasks.contains(currTask) @@ -251,7 +252,10 @@ private void addStatsTask(FileSinkOperator nd, MoveTask mvTask, // mark the MapredWork and FileSinkOperator for gathering stats nd.getConf().setGatherStats(true); - mrWork.setGatheringStats(true); + mrWork.getMapWork().setGatheringStats(true); + if (mrWork.getReduceWork() != null) { + mrWork.getReduceWork().setGatheringStats(true); + } nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); nd.getConf().setMaxStatsKeyPrefixLength( hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH)); @@ -357,7 +361,8 @@ private void createMRWorkForMergingFiles (FileSinkOperator fsInput, GenMRProcCon // MoveWork dummyMv = new MoveWork(null, null, null, new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false); - MapredWork cplan; + MapWork cplan; + Serializable work; if (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) { @@ -370,6 +375,7 @@ private void createMRWorkForMergingFiles (FileSinkOperator fsInput, GenMRProcCon LOG.info("RCFile format- Using block level merge"); cplan = createRCFileMergeTask(fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0); + work = cplan; } catch (ClassNotFoundException e) { String msg = "Illegal input format class: " + inputFormatClass; throw new SemanticException(msg); @@ -377,12 +383,14 @@ private void createMRWorkForMergingFiles (FileSinkOperator fsInput, GenMRProcCon } else { cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc); + work = new MapredWork(); + ((MapredWork)work).setMapWork(cplan); // use CombineHiveInputFormat for map-only merging } cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"); // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't // know if merge MR2 will be triggered at execution time - ConditionalTask cndTsk = createCondTask(conf, ctx.getCurrTask(), dummyMv, cplan, + ConditionalTask cndTsk = createCondTask(conf, ctx.getCurrTask(), dummyMv, work, fsInputDesc.getFinalDirName()); // keep the dynamic partition context in conditional task resolver context @@ -483,7 +491,7 @@ private void addDependentMoveTasks(GenMRProcContext ctx, Task mvTask, * the last FileSinkOperator in the parent MapReduce work * @return the MapredWork */ - private MapredWork createMRWorkForMergingFiles (HiveConf conf, + private MapWork createMRWorkForMergingFiles (HiveConf conf, Operator topOp, FileSinkDesc fsDesc) { ArrayList aliases = new ArrayList(); @@ -492,10 +500,10 @@ private MapredWork createMRWorkForMergingFiles (HiveConf conf, aliases.add(inputDir); // dummy alias: just use the input path // constructing the default MapredWork - MapredWork cplan = GenMapRedUtils.getMapRedWorkFromConf(conf); + MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf); + MapWork cplan = cMrPlan.getMapWork(); cplan.getPathToAliases().put(inputDir, aliases); cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null)); - cplan.setNumReduceTasks(0); cplan.getAliasToWork().put(inputDir, topOp); cplan.setMapperCannotSpanPartns(true); @@ -510,7 +518,7 @@ private MapredWork createMRWorkForMergingFiles (HiveConf conf, * @return MergeWork if table is stored as RCFile, * null otherwise */ - private MapredWork createRCFileMergeTask(FileSinkDesc fsInputDesc, + private MapWork createRCFileMergeTask(FileSinkDesc fsInputDesc, String finalName, boolean hasDynamicPartitions) throws SemanticException { String inputDir = fsInputDesc.getFinalDirName(); @@ -573,7 +581,7 @@ private boolean isSkewedStoredAsDirs(FileSinkDesc fsInputDesc) { */ private ConditionalTask createCondTask(HiveConf conf, Task currTask, MoveWork mvWork, - MapredWork mergeWork, String inputPath) { + Serializable mergeWork, String inputPath) { // There are 3 options for this ConditionalTask: // 1) Merge the partitions @@ -720,7 +728,7 @@ private String processFS(FileSinkOperator fsOp, Stack stack, if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, - (MapredWork) currTask.getWork(), false, ctx); + ((MapredWork) currTask.getWork()).getMapWork(), false, ctx); } opTaskMap.put(null, currTask); if (!rootTasks.contains(currTask) @@ -732,7 +740,7 @@ private String processFS(FileSinkOperator fsOp, Stack stack, if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, - (MapredWork) mapTask.getWork(), false, ctx); + ((MapredWork) mapTask.getWork()).getMapWork(), false, ctx); } else { UnionOperator currUnionOp = ctx.getCurrUnionOp(); if (currUnionOp != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java index 50d645f..52237e5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java @@ -77,7 +77,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // If the plan for this reducer does not exist, initialize the plan if (opMapTask == null) { - if (currPlan.getReducer() == null) { + if (currPlan.getReduceWork() == null) { GenMapRedUtils.initPlan(op, ctx); } else { GenMapRedUtils.splitPlan(op, ctx); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java index e299a56..cccf953 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink3.java @@ -85,13 +85,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // If the plan for this reducer does not exist, initialize the plan if (reducerTask == null) { // When the reducer is encountered for the first time - if (plan.getReducer() == null) { + if (plan.getReduceWork() == null) { GenMapRedUtils.initUnionPlan(op, union, ctx, unionTask); // When union is followed by a multi-table insert } else { GenMapRedUtils.splitPlan(op, ctx); } - } else if (plan.getReducer() == reducer) { + } else if (plan.getReduceWork() != null && plan.getReduceWork().getReducer() == reducer) { // The union is already initialized. However, the union is walked from // another input // initUnionPlan is idempotent diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 7178b55..1965f53 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -117,7 +117,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, handlePartialScanCommand(op, ctx, parseCtx, currTask, parseInfo, statsWork, statsTask); } - currWork.setGatheringStats(true); + currWork.getMapWork().setGatheringStats(true); + if (currWork.getReduceWork() != null) { + currWork.getReduceWork().setGatheringStats(true); + } // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, // and pass it to setTaskPlan as the last parameter Set confirmedPartns = new HashSet(); @@ -139,9 +142,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias); PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns, new HashSet(), null); - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currWork, false, ctx, partList); + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currWork.getMapWork(), false, ctx, partList); } else { // non-partitioned table - GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currWork, false, ctx); + GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currWork.getMapWork(), false, ctx); } } return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java index a719a83..9e36938 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java @@ -199,7 +199,7 @@ private void processSubQueryUnionMerge(GenMRProcContext ctx, if (!seenOps.contains(topOp) && topOp != null) { seenOps.add(topOp); GenMapRedUtils.setTaskPlan(ctx.getCurrAliasId(), ctx - .getCurrTopOp(), plan, false, ctx); + .getCurrTopOp(), plan.getMapWork(), false, ctx); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 01fbca5..728426b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -60,12 +60,14 @@ import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; @@ -101,10 +103,11 @@ public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) Operator currTopOp = opProcCtx.getCurrTopOp(); opTaskMap.put(reducer, currTask); - plan.setReducer(reducer); + plan.setReduceWork(new ReduceWork()); + plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf(); - plan.setNumReduceTasks(desc.getNumReducers()); + plan.getReduceWork().setNumReduceTasks(desc.getNumReducers()); List> rootTasks = opProcCtx.getRootTasks(); @@ -114,7 +117,7 @@ public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) rootTasks.add(currTask); } if (reducer.getClass() == JoinOperator.class) { - plan.setNeedsTagging(true); + plan.getReduceWork().setNeedsTagging(true); } assert currTopOp != null; @@ -123,7 +126,7 @@ public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); - setTaskPlan(currAliasId, currTopOp, plan, false, opProcCtx); + setTaskPlan(currAliasId, currTopOp, plan.getMapWork(), false, opProcCtx); } currTopOp = null; @@ -153,13 +156,16 @@ public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionO opProcCtx.getOpTaskMap(); opTaskMap.put(reducer, unionTask); - plan.setReducer(reducer); + + plan.setReduceWork(new ReduceWork()); + plan.getReduceWork().setReducer(reducer); + plan.getReduceWork().setReducer(reducer); ReduceSinkDesc desc = op.getConf(); - plan.setNumReduceTasks(desc.getNumReducers()); + plan.getReduceWork().setNumReduceTasks(desc.getNumReducers()); if (reducer.getClass() == JoinOperator.class) { - plan.setNeedsTagging(true); + plan.getReduceWork().setNeedsTagging(true); } initUnionPlan(opProcCtx, currUnionOp, unionTask, false); @@ -175,7 +181,7 @@ private static void setUnionPlan(GenMRProcContext opProcCtx, String currAliasId = opProcCtx.getCurrAliasId(); if (!seenOps.contains(currTopOp) || mergeTask) { seenOps.add(currTopOp); - setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); + setTaskPlan(currAliasId, currTopOp, plan.getMapWork(), local, opProcCtx); } currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); @@ -194,13 +200,14 @@ private static void setUnionPlan(GenMRProcContext opProcCtx, for (int pos = 0; pos < size; pos++) { String taskTmpDir = taskTmpDirLst.get(pos); TableDesc tt_desc = tt_descLst.get(pos); - if (plan.getPathToAliases().get(taskTmpDir) == null) { - plan.getPathToAliases().put(taskTmpDir, + MapWork mWork = plan.getMapWork(); + if (mWork.getPathToAliases().get(taskTmpDir) == null) { + mWork.getPathToAliases().put(taskTmpDir, new ArrayList()); - plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); - plan.getPathToPartitionInfo().put(taskTmpDir, + mWork.getPathToAliases().get(taskTmpDir).add(taskTmpDir); + mWork.getPathToPartitionInfo().put(taskTmpDir, new PartitionDesc(tt_desc, null)); - plan.getAliasToWork().put(taskTmpDir, topOperators.get(pos)); + mWork.getAliasToWork().put(taskTmpDir, topOperators.get(pos)); } } } @@ -323,7 +330,7 @@ public static void joinPlan(Operator op, local = (pos == ((MapJoinDesc) op.getConf()).getPosBigTable()) ? false : true; } - setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); + setTaskPlan(currAliasId, currTopOp, plan.getMapWork(), local, opProcCtx); } currTopOp = null; opProcCtx.setCurrTopOp(currTopOp); @@ -359,10 +366,12 @@ public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) Operator reducer = op.getChildOperators().get(0); // Add the reducer - cplan.setReducer(reducer); + ReduceWork rWork = new ReduceWork(); + cplan.setReduceWork(rWork); + rWork.setReducer(reducer); ReduceSinkDesc desc = op.getConf(); - cplan.setNumReduceTasks(new Integer(desc.getNumReducers())); + cplan.getReduceWork().setNumReduceTasks(new Integer(desc.getNumReducers())); HashMap, Task> opTaskMap = opProcCtx.getOpTaskMap(); @@ -388,7 +397,7 @@ public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) * processing context */ public static void setTaskPlan(String alias_id, - Operator topOp, MapredWork plan, boolean local, + Operator topOp, MapWork plan, boolean local, GenMRProcContext opProcCtx) throws SemanticException { setTaskPlan(alias_id, topOp, plan, local, opProcCtx, null); } @@ -432,7 +441,7 @@ private static ReadEntity getParentViewInfo(String alias_id, * pruned partition list. If it is null it will be computed on-the-fly. */ public static void setTaskPlan(String alias_id, - Operator topOp, MapredWork plan, boolean local, + Operator topOp, MapWork plan, boolean local, GenMRProcContext opProcCtx, PrunedPartitionList pList) throws SemanticException { ParseContext parseCtx = opProcCtx.getParseCtx(); Set inputs = opProcCtx.getInputs(); @@ -698,7 +707,7 @@ public static void setTaskPlan(String alias_id, * table descriptor */ public static void setTaskPlan(String path, String alias, - Operator topOp, MapredWork plan, boolean local, + Operator topOp, MapWork plan, boolean local, TableDesc tt_desc) throws SemanticException { if (path == null || alias == null) { @@ -737,7 +746,7 @@ public static void setTaskPlan(String path, String alias, * @param topOp * current top operator in the path */ - public static void setKeyAndValueDesc(MapredWork plan, + public static void setKeyAndValueDesc(ReduceWork plan, Operator topOp) { if (topOp == null) { return; @@ -778,12 +787,12 @@ public static void setKeyAndValueDescForTaskTree(Task ta } } else if (task instanceof ExecDriver) { MapredWork work = (MapredWork) task.getWork(); - work.deriveExplainAttributes(); + work.getMapWork().deriveExplainAttributes(); HashMap> opMap = work - .getAliasToWork(); + .getMapWork().getAliasToWork(); if (opMap != null && !opMap.isEmpty()) { for (Operator op : opMap.values()) { - setKeyAndValueDesc(work, op); + setKeyAndValueDesc(work.getReduceWork(), op); } } } @@ -804,7 +813,7 @@ public static void setKeyAndValueDescForTaskTree(Task ta */ public static MapredWork getMapRedWork(ParseContext parseCtx) { MapredWork work = getMapRedWorkFromConf(parseCtx.getConf()); - work.setNameToSplitSample(parseCtx.getNameToSplitSample()); + work.getMapWork().setNameToSplitSample(parseCtx.getNameToSplitSample()); return work; } @@ -815,7 +824,8 @@ public static MapredWork getMapRedWork(ParseContext parseCtx) { * @return the new plan */ public static MapredWork getMapRedWorkFromConf(HiveConf conf) { - MapredWork work = new MapredWork(); + MapredWork mrWork = new MapredWork(); + MapWork work = mrWork.getMapWork(); boolean mapperCannotSpanPartns = conf.getBoolVar( @@ -824,11 +834,9 @@ public static MapredWork getMapRedWorkFromConf(HiveConf conf) { work.setPathToAliases(new LinkedHashMap>()); work.setPathToPartitionInfo(new LinkedHashMap()); work.setAliasToWork(new LinkedHashMap>()); - work.setTagToValueDesc(new ArrayList()); - work.setReducer(null); work.setHadoopSupportsSplittable( conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE)); - return work; + return mrWork; } /** @@ -939,7 +947,7 @@ public static void splitTasks(Operator op, streamDesc = "$INTNAME"; origStreamDesc = streamDesc; int pos = 0; - while (cplan.getAliasToWork().get(streamDesc) != null) { + while (cplan.getMapWork().getAliasToWork().get(streamDesc) != null) { streamDesc = origStreamDesc.concat(String.valueOf(++pos)); } } @@ -947,12 +955,12 @@ public static void splitTasks(Operator op, // TODO: Allocate work to remove the temporary files and make that // dependent on the redTask if (reducer.getClass() == JoinOperator.class) { - cplan.setNeedsTagging(true); + cplan.getReduceWork().setNeedsTagging(true); } } // Add the path to alias mapping - setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, local, tt_desc); + setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan.getMapWork(), local, tt_desc); opProcCtx.setCurrTopOp(null); opProcCtx.setCurrAliasId(null); opProcCtx.setCurrTask(childTask); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java index c51babb..a1f0bf6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -77,7 +78,7 @@ public static int getPositionParent(AbstractMapJoinOperator currMapJoinOp) { if (currMapJoinOp != null) { Map>> aliasBucketFileNameMapping = @@ -174,8 +175,8 @@ private static void initMapJoinPlan(AbstractMapJoinOperator oldTask, GenMRProcContext opProcCtx, int pos) throws SemanticException { - MapredWork plan = (MapredWork) oldTask.getWork(); + MapWork plan = ((MapredWork) oldTask.getWork()).getMapWork(); Operator currTopOp = opProcCtx.getCurrTopOp(); List> seenOps = opProcCtx.getSeenOps(); @@ -255,7 +256,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // branches for this mapjoin have been encounered. Join the plan with the plan created // the first time. if (opMapTask == null) { - assert currPlan.getReducer() == null; + assert currPlan.getReduceWork() == null; initMapJoinPlan(mapJoin, ctx, pos); } else { // The current plan can be thrown away after being merged with the diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index d83fb66..b5614e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -134,7 +134,7 @@ private static String genMapJoinLocalWork(MapredWork newWork, MapJoinOperator ma new LinkedHashMap()); for (Map.Entry> entry : - newWork.getAliasToWork().entrySet()) { + newWork.getMapWork().getAliasToWork().entrySet()) { String alias = entry.getKey(); Operator op = entry.getValue(); @@ -162,7 +162,7 @@ private static String genMapJoinLocalWork(MapredWork newWork, MapJoinOperator ma smallTableAliasList.add(alias); // get input path and remove this alias from pathToAlias // because this file will be fetched by fetch operator - LinkedHashMap> pathToAliases = newWork.getPathToAliases(); + LinkedHashMap> pathToAliases = newWork.getMapWork().getPathToAliases(); // keep record all the input path for this alias HashSet pathSet = new HashSet(); @@ -193,7 +193,7 @@ private static String genMapJoinLocalWork(MapredWork newWork, MapJoinOperator ma List partDesc = new ArrayList(); for (String tablePath : pathSet) { - PartitionDesc partitionDesc = newWork.getPathToPartitionInfo().get(tablePath); + PartitionDesc partitionDesc = newWork.getMapWork().getPathToPartitionInfo().get(tablePath); // create fetchwork for non partitioned table if (partitionDesc.getPartSpec() == null || partitionDesc.getPartSpec().size() == 0) { fetchWork = new FetchWork(tablePath, partitionDesc.getTableDesc()); @@ -205,7 +205,7 @@ private static String genMapJoinLocalWork(MapredWork newWork, MapJoinOperator ma } // create fetchwork for partitioned table if (fetchWork == null) { - TableDesc table = newWork.getAliasToPartnInfo().get(alias).getTableDesc(); + TableDesc table = newWork.getMapWork().getAliasToPartnInfo().get(alias).getTableDesc(); fetchWork = new FetchWork(partDir, partDesc, table); } // set alias to fetch work @@ -213,13 +213,13 @@ private static String genMapJoinLocalWork(MapredWork newWork, MapJoinOperator ma } // remove small table ailias from aliasToWork;Avoid concurrent modification for (String alias : smallTableAliasList) { - newWork.getAliasToWork().remove(alias); + newWork.getMapWork().getAliasToWork().remove(alias); } // set up local work - newWork.setMapLocalWork(newLocalWork); + newWork.getMapWork().setMapLocalWork(newLocalWork); // remove reducer - newWork.setReducer(null); + newWork.setReduceWork(null); // return the big table alias if (bigTableAlias == null) { throw new SemanticException("Big Table Alias is null"); @@ -240,8 +240,8 @@ private static String genMapJoinLocalWork(MapredWork newWork, MapJoinOperator ma public static String genMapJoinOpAndLocalWork(MapredWork newWork, JoinOperator op, int mapJoinPos) throws SemanticException { LinkedHashMap, OpParseContext> opParseCtxMap = - newWork.getOpParseCtxMap(); - QBJoinTree newJoinTree = newWork.getJoinTree(); + newWork.getMapWork().getOpParseCtxMap(); + QBJoinTree newJoinTree = newWork.getMapWork().getJoinTree(); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = MapJoinProcessor.convertMapJoin(opParseCtxMap, op, newJoinTree, mapJoinPos, true, false); @@ -256,8 +256,8 @@ public static String genLocalWorkForMapJoin(MapredWork newWork, MapJoinOperator String bigTableAlias = MapJoinProcessor .genMapJoinLocalWork(newWork, newMapJoinOp, mapJoinPos); // clean up the mapred work - newWork.setOpParseCtxMap(null); - newWork.setJoinTree(null); + newWork.getMapWork().setOpParseCtxMap(null); + newWork.getMapWork().setJoinTree(null); return bigTableAlias; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/AbstractJoinTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/AbstractJoinTaskDispatcher.java index c876ab7..33ef581 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/AbstractJoinTaskDispatcher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/AbstractJoinTaskDispatcher.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.lib.TaskGraphWalker.TaskGraphWalkerContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapWork; /** * Common iteration methods for converting joins and sort-merge joins. @@ -119,7 +120,7 @@ protected void replaceTask( } } - public long getTotalKnownInputSize(Context context, MapredWork currWork, + public long getTotalKnownInputSize(Context context, MapWork currWork, Map> pathToAliases, HashMap aliasToSize) throws SemanticException { try { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java index 35dfdc5..87fba2d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java @@ -86,13 +86,13 @@ private void inferBucketingSorting(List mapRedTasks) throws Semantic continue; } - Operator reducer = mapRedTask.getWork().getReducer(); - if (reducer == null) { + if (mapRedTask.getWork().getReduceWork() == null) { continue; } + Operator reducer = mapRedTask.getWork().getReduceWork().getReducer(); // uses sampling, which means it's not bucketed - boolean disableBucketing = mapRedTask.getWork().getSamplingType() > 0; + boolean disableBucketing = mapRedTask.getWork().getMapWork().getSamplingType() > 0; BucketingSortingCtx bCtx = new BucketingSortingCtx(disableBucketing); // RuleRegExp rules are used to match operators anywhere in the tree @@ -145,8 +145,8 @@ private void inferBucketingSorting(List mapRedTasks) throws Semantic topNodes.add(reducer); ogw.startWalking(topNodes, null); - mapRedTask.getWork().getBucketedColsByDirectory().putAll(bCtx.getBucketedColsByDirectory()); - mapRedTask.getWork().getSortedColsByDirectory().putAll(bCtx.getSortedColsByDirectory()); + mapRedTask.getWork().getMapWork().getBucketedColsByDirectory().putAll(bCtx.getBucketedColsByDirectory()); + mapRedTask.getWork().getMapWork().getSortedColsByDirectory().putAll(bCtx.getSortedColsByDirectory()); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java index 6d78e33..4d7bbf5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java @@ -50,10 +50,12 @@ import org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx; import org.apache.hadoop.hive.ql.plan.ConditionalWork; import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; /* * Convert tasks involving JOIN into MAPJOIN. @@ -108,7 +110,7 @@ public CommonJoinTaskDispatcher(PhysicalContext context) { } // Get the position of the big table for this join operator and the given alias - private int getPosition(MapredWork work, Operator joinOp, + private int getPosition(MapWork work, Operator joinOp, String alias) { Operator parentOp = work.getAliasToWork().get(alias); @@ -127,9 +129,9 @@ private int getPosition(MapredWork work, Operator joinOp */ private void mergeMapJoinTaskWithChildMapJoinTask(MapRedTask task, Configuration conf) { MapRedTask childTask = (MapRedTask) task.getChildTasks().get(0); - MapredWork work = task.getWork(); + MapWork work = task.getWork().getMapWork(); MapredLocalWork localWork = work.getMapLocalWork(); - MapredWork childWork = childTask.getWork(); + MapWork childWork = childTask.getWork().getMapWork(); MapredLocalWork childLocalWork = childWork.getMapLocalWork(); // Can this be merged @@ -256,19 +258,26 @@ private void mergeMapJoinTaskWithChildMapJoinTask(MapRedTask task, Configuration * @param childTask */ private void copyReducerConf(MapRedTask task, MapRedTask childTask) { - MapredWork childWork = childTask.getWork(); + MapredWork mrChildWork = childTask.getWork(); + ReduceWork childWork = childTask.getWork().getReduceWork(); + if (childWork == null) { + return; + } + Operator childReducer = childWork.getReducer(); MapredWork work = task.getWork(); if (childReducer == null) { return; } - work.setReducer(childReducer); - work.setNumReduceTasks(childWork.getNumReduceTasks()); - work.setJoinTree(childWork.getJoinTree()); - work.setNeedsTagging(childWork.getNeedsTagging()); + ReduceWork rWork = new ReduceWork(); + work.setReduceWork(rWork); + rWork.setReducer(childReducer); + rWork.setNumReduceTasks(childWork.getNumReduceTasks()); + work.getMapWork().setJoinTree(mrChildWork.getMapWork().getJoinTree()); + rWork.setNeedsTagging(childWork.getNeedsTagging()); // Make sure the key configuration is correct, clear and regenerate. - work.getTagToValueDesc().clear(); + rWork.getTagToValueDesc().clear(); GenMapRedUtils.setKeyAndValueDescForTaskTree(task); } @@ -303,10 +312,9 @@ private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configura return; } MapRedTask childTask = (MapRedTask) firstChildTask; - MapredWork mapJoinWork = mapJoinTask.getWork(); + MapWork mapJoinWork = mapJoinTask.getWork().getMapWork(); MapredWork childWork = childTask.getWork(); - Operator childReducer = childWork.getReducer(); - if (childReducer == null) { + if (childWork.getReduceWork() == null) { // Not a MR job, nothing to merge. return; } @@ -316,7 +324,7 @@ private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configura if (aliasToWork.size() > 1) { return; } - Map> childPathToAliases = childWork.getPathToAliases(); + Map> childPathToAliases = childWork.getMapWork().getPathToAliases(); if (childPathToAliases.size() > 1) { return; } @@ -347,7 +355,7 @@ private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configura } MapredLocalWork mapJoinLocalWork = mapJoinWork.getMapLocalWork(); - MapredLocalWork childLocalWork = childWork.getMapLocalWork(); + MapredLocalWork childLocalWork = childWork.getMapWork().getMapLocalWork(); // Either of them should not be bucketed if ((mapJoinLocalWork != null && mapJoinLocalWork.getBucketMapjoinContext() != null) || @@ -355,12 +363,12 @@ private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configura return; } - if (childWork.getAliasToWork().size() > 1) { + if (childWork.getMapWork().getAliasToWork().size() > 1) { return; } Operator childAliasOp = - childWork.getAliasToWork().values().iterator().next(); + childWork.getMapWork().getAliasToWork().values().iterator().next(); if (mapJoinTaskFileSinkOperator.getParentOperators().size() > 1) { return; } @@ -387,10 +395,10 @@ private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configura parentOps.add(parentFOp); childAliasOp.setParentOperators(parentOps); - mapJoinWork.getAliasToPartnInfo().putAll(childWork.getAliasToPartnInfo()); - for (Map.Entry childWorkEntry : childWork.getPathToPartitionInfo() + mapJoinWork.getAliasToPartnInfo().putAll(childWork.getMapWork().getAliasToPartnInfo()); + for (Map.Entry childWorkEntry : childWork.getMapWork().getPathToPartitionInfo() .entrySet()) { - if (childWork.getAliasToPartnInfo().containsValue(childWorkEntry.getKey())) { + if (childWork.getMapWork().getAliasToPartnInfo().containsValue(childWorkEntry.getKey())) { mapJoinWork.getPathToPartitionInfo() .put(childWorkEntry.getKey(), childWorkEntry.getValue()); } @@ -428,7 +436,7 @@ private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configura } currTask.setTaskTag(Task.COMMON_JOIN); - MapredWork currWork = currTask.getWork(); + MapWork currWork = currTask.getWork().getMapWork(); // create conditional work list and task list List listWorks = new ArrayList(); @@ -519,7 +527,7 @@ private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configura if (convertJoinMapJoin) { // create map join task and set big table as bigTablePosition - MapRedTask newTask = convertTaskToMapJoinTask(currWork, bigTablePosition).getFirst(); + MapRedTask newTask = convertTaskToMapJoinTask(currTask.getWork(), bigTablePosition).getFirst(); newTask.setTaskTag(Task.MAPJOIN_ONLY_NOBACKUP); replaceTask(currTask, newTask, physicalContext); @@ -555,9 +563,9 @@ private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configura } // deep copy a new mapred work from xml // Once HIVE-4396 is in, it would be faster to use a cheaper method to clone the plan - String xml = currWork.toXML(); + String xml = currTask.getWork().toXML(); InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8")); - MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf()); + MapredWork newWork = Utilities.deserializeObject(in); // create map join task and set big table as i ObjectPair newTaskAlias = convertTaskToMapJoinTask(newWork, i); @@ -642,14 +650,15 @@ private boolean checkOperatorOKMapJoinConversion(Operator reducerOp = work.getReducer(); + Operator reducerOp = rWork.getReducer(); if (reducerOp instanceof JoinOperator) { /* Is any operator present, which prevents the conversion */ - Map> aliasToWork = work.getAliasToWork(); + Map> aliasToWork = mWork.getAliasToWork(); for (Operator op : aliasToWork.values()) { if (!checkOperatorOKMapJoinConversion(op)) { return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java index 2bbb278..ee4d4d1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.optimizer.physical; import java.io.ByteArrayInputStream; -import java.io.File; import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.util.ArrayList; @@ -50,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -104,6 +104,7 @@ private GenMRSkewJoinProcessor() { * https://issues.apache.org/jira/browse/HIVE-964. * */ + @SuppressWarnings("unchecked") public static void processSkewJoin(JoinOperator joinOp, Task currTask, ParseContext parseCtx) throws SemanticException { @@ -151,7 +152,7 @@ public static void processSkewJoin(JoinOperator joinOp, List> listTasks = new ArrayList>(); MapredWork currPlan = (MapredWork) currTask.getWork(); - TableDesc keyTblDesc = (TableDesc) currPlan.getKeyDesc().clone(); + TableDesc keyTblDesc = (TableDesc) currPlan.getReduceWork().getKeyDesc().clone(); List joinKeys = Utilities .getColumnNames(keyTblDesc.getProperties()); List joinKeyTypes = Utilities.getColumnTypes(keyTblDesc @@ -232,7 +233,7 @@ public static void processSkewJoin(JoinOperator joinOp, for (int i = 0; i < numAliases - 1; i++) { Byte src = tags[i]; - MapredWork newPlan = PlanUtils.getMapRedWork(); + MapWork newPlan = PlanUtils.getMapRedWork().getMapWork(); // This code has been only added for testing boolean mapperCannotSpanPartns = @@ -246,7 +247,7 @@ public static void processSkewJoin(JoinOperator joinOp, StringBuilder sb = new StringBuilder(xmlPlan); ByteArrayInputStream bis; bis = new ByteArrayInputStream(sb.toString().getBytes("UTF-8")); - clonePlan = Utilities.deserializeMapRedWork(bis, parseCtx.getConf()); + clonePlan = Utilities.deserializeObject(bis); } catch (UnsupportedEncodingException e) { throw new SemanticException(e); } @@ -276,7 +277,7 @@ public static void processSkewJoin(JoinOperator joinOp, newPlan.getPathToPartitionInfo().put(bigKeyDirPath, part); newPlan.getAliasToPartnInfo().put(alias, part); - Operator reducer = clonePlan.getReducer(); + Operator reducer = clonePlan.getReduceWork().getReducer(); assert reducer instanceof JoinOperator; JoinOperator cloneJoinOp = (JoinOperator) reducer; @@ -328,16 +329,18 @@ public static void processSkewJoin(JoinOperator joinOp, newPlan .setMinSplitSize(HiveConf.getLongVar(jc, HiveConf.ConfVars.HIVESKEWJOINMAPJOINMINSPLIT)); newPlan.setInputformat(HiveInputFormat.class.getName()); - Task skewJoinMapJoinTask = TaskFactory.get( - newPlan, jc); + + MapredWork w = new MapredWork(); + w.setMapWork(newPlan); + + Task skewJoinMapJoinTask = TaskFactory.get(w, jc); bigKeysDirToTaskMap.put(bigKeyDirPath, skewJoinMapJoinTask); listWorks.add(skewJoinMapJoinTask.getWork()); listTasks.add(skewJoinMapJoinTask); } ConditionalWork cndWork = new ConditionalWork(listWorks); - ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, - parseCtx.getConf()); + ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, parseCtx.getConf()); cndTsk.setListTasks(listTasks); cndTsk.setResolver(new ConditionalResolverSkewJoin()); cndTsk diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java index efaeccf..010ac54 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java @@ -48,8 +48,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ConditionalResolver; import org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin; -import - org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx; +import org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx; import org.apache.hadoop.hive.ql.plan.ConditionalResolverSkewJoin; import org.apache.hadoop.hive.ql.plan.ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx; import org.apache.hadoop.hive.ql.plan.ConditionalWork; @@ -98,14 +97,14 @@ private void processCurrentTask(Task currTask, ConditionalTask conditionalTask) throws SemanticException { // get current mapred work and its local work MapredWork mapredWork = (MapredWork) currTask.getWork(); - MapredLocalWork localwork = mapredWork.getMapLocalWork(); + MapredLocalWork localwork = mapredWork.getMapWork().getMapLocalWork(); if (localwork != null) { // get the context info and set up the shared tmp URI Context ctx = physicalContext.getContext(); String tmpFileURI = Utilities.generateTmpURI(ctx.getLocalTmpFileURI(), currTask.getId()); localwork.setTmpFileURI(tmpFileURI); String hdfsTmpURI = Utilities.generateTmpURI(ctx.getMRTmpFileURI(), currTask.getId()); - mapredWork.setTmpHDFSFileURI(hdfsTmpURI); + mapredWork.getMapWork().setTmpHDFSFileURI(hdfsTmpURI); // create a task for this local work; right now, this local work is shared // by the original MapredTask and this new generated MapredLocalTask. MapredLocalTask localTask = (MapredLocalTask) TaskFactory.get(localwork, physicalContext @@ -134,7 +133,7 @@ private void processCurrentTask(Task currTask, newLocalWork.setTmpFileURI(tmpFileURI); newLocalWork.setInputFileChangeSensitive(localwork.getInputFileChangeSensitive()); newLocalWork.setBucketMapjoinContext(localwork.copyPartSpecMappingOnly()); - mapredWork.setMapLocalWork(newLocalWork); + mapredWork.getMapWork().setMapLocalWork(newLocalWork); // get all parent tasks List> parentTasks = currTask.getParentTasks(); currTask.setParentTasks(null); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java index 15653bf..b208d64 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -188,7 +189,7 @@ public MetadataOnlyTaskDispatcher(PhysicalContext context) { physicalContext = context; } - private String getAliasForTableScanOperator(MapredWork work, + private String getAliasForTableScanOperator(MapWork work, TableScanOperator tso) { for (Map.Entry> entry : @@ -211,7 +212,7 @@ private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc) { return desc; } - private List getPathsForAlias(MapredWork work, String alias) { + private List getPathsForAlias(MapWork work, String alias) { List paths = new ArrayList(); for (Map.Entry> entry : work.getPathToAliases().entrySet()) { @@ -223,7 +224,7 @@ private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc) { return paths; } - private void processAlias(MapredWork work, String alias) { + private void processAlias(MapWork work, String alias) { // Change the alias partition desc PartitionDesc aliasPartn = work.getAliasToPartnInfo().get(alias); changePartitionToMetadataOnly(aliasPartn); @@ -247,7 +248,7 @@ private String encode(Map partSpec) { return partSpec.toString().replaceAll("[:/#\\?]", "_"); } - private void convertToMetadataOnlyQuery(MapredWork work, + private void convertToMetadataOnlyQuery(MapWork work, TableScanOperator tso) { String alias = getAliasForTableScanOperator(work, tso); processAlias(work, alias); @@ -306,7 +307,7 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) while (iterator.hasNext()) { TableScanOperator tso = iterator.next(); LOG.info("Metadata only table scan for " + tso.getConf().getAlias()); - convertToMetadataOnlyQuery((MapredWork) task.getWork(), tso); + convertToMetadataOnlyQuery(((MapredWork) task.getWork()).getMapWork(), tso); } return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java index c0c232e..2e1d15c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java @@ -27,7 +27,9 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.ReduceWork; /** * Mark final MapredWork for ORDER BY to use sampling and set number of reduce task as -1 @@ -39,12 +41,16 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { if (!(task instanceof MapRedTask) || !((MapRedTask)task).getWork().isFinalMapRed()) { continue; // this could be replaced by bucketing on RS + bucketed fetcher for next MR } - MapredWork mapreWork = ((MapRedTask) task).getWork(); - if (mapreWork.getNumReduceTasks() != 1 || mapreWork.getAliasToWork().size() != 1 || - mapreWork.getSamplingType() > 0 || mapreWork.getReducer() == null) { + MapredWork mrWork = ((MapRedTask) task).getWork(); + MapWork mapWork = mrWork.getMapWork(); + ReduceWork reduceWork = mrWork.getReduceWork(); + + if (reduceWork == null || reduceWork.getNumReduceTasks() != 1 + || mapWork.getAliasToWork().size() != 1 || mapWork.getSamplingType() > 0 + || reduceWork.getReducer() == null) { continue; } - Operator operator = mapreWork.getAliasToWork().values().iterator().next(); + Operator operator = mapWork.getAliasToWork().values().iterator().next(); if (!(operator instanceof TableScanOperator)) { continue; } @@ -55,8 +61,8 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { continue; } child.getConf().setNumReducers(-1); - mapreWork.setNumReduceTasks(-1); - mapreWork.setSamplingType(MapredWork.SAMPLING_ON_START); + reduceWork.setNumReduceTasks(-1); + mapWork.setSamplingType(MapWork.SAMPLING_ON_START); } return pctx; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java index 88786ff..f48d118 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java @@ -74,7 +74,7 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) Task task = (Task) nd; if (!task.isMapRedTask() || task instanceof ConditionalTask - || ((MapredWork) task.getWork()).getReducer() == null) { + || ((MapredWork) task.getWork()).getReduceWork() == null) { return null; } @@ -94,7 +94,9 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) // iterator the reducer operator tree ArrayList topNodes = new ArrayList(); - topNodes.add(((MapredWork) task.getWork()).getReducer()); + if (((MapredWork)task.getWork()).getReduceWork() != null) { + topNodes.add(((MapredWork) task.getWork()).getReduceWork().getReducer()); + } ogw.startWalking(topNodes, null); return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java index af56857..da5115b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java @@ -52,10 +52,12 @@ import org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx; import org.apache.hadoop.hive.ql.plan.ConditionalWork; import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; /** @@ -72,7 +74,7 @@ public SortMergeJoinTaskDispatcher(PhysicalContext context) { // Convert the work in the SMB plan to a regular join // Note that the operator tree is not fixed, only the path/alias mappings in the // plan are fixed. The operator tree will still contain the SMBJoinOperator - private void genSMBJoinWork(MapredWork currWork, SMBMapJoinOperator smbJoinOp) { + private void genSMBJoinWork(MapWork currWork, SMBMapJoinOperator smbJoinOp) { // Remove the paths which are not part of aliasToPartitionInfo Map aliasToPartitionInfo = currWork.getAliasToPartnInfo(); List removePaths = new ArrayList(); @@ -150,7 +152,7 @@ private MapredWork convertSMBWorkToJoinWork(MapredWork currWork, SMBMapJoinOpera // deep copy a new mapred work InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8")); - MapredWork currJoinWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf()); + MapredWork currJoinWork = Utilities.deserializeObject(in); SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(currJoinWork); // Add the row resolver for the new operator @@ -158,7 +160,7 @@ private MapredWork convertSMBWorkToJoinWork(MapredWork currWork, SMBMapJoinOpera physicalContext.getParseContext().getOpParseCtx(); opParseContextMap.put(newSMBJoinOp, opParseContextMap.get(oldSMBJoinOp)); // change the newly created map-red plan as if it was a join operator - genSMBJoinWork(currJoinWork, newSMBJoinOp); + genSMBJoinWork(currJoinWork.getMapWork(), newSMBJoinOp); return currJoinWork; } catch (Exception e) { e.printStackTrace(); @@ -174,24 +176,25 @@ private MapredWork convertSMBWorkToJoinWork(MapredWork currWork, SMBMapJoinOpera throws UnsupportedEncodingException, SemanticException { // deep copy a new mapred work from xml InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8")); - MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf()); + MapredWork newWork = Utilities.deserializeObject(in); // create a mapred task for this work MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext .getParseContext().getConf()); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = getMapJoinOperator(newTask, newWork, smbJoinOp, joinTree, bigTablePosition); + // The reducer needs to be restored - Consider a query like: // select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; // The reducer contains a groupby, which needs to be restored. - Operator reducer = newWork.getReducer(); + ReduceWork rWork = newWork.getReduceWork(); // create the local work for this plan String bigTableAlias = MapJoinProcessor.genLocalWorkForMapJoin(newWork, newMapJoinOp, bigTablePosition); // restore the reducer - newWork.setReducer(reducer); + newWork.setReduceWork(rWork); return new ObjectPair(newTask, bigTableAlias); } @@ -259,10 +262,10 @@ private boolean isEligibleForOptimization(SMBMapJoinOperator originalSMBJoinOp) MapredWork currJoinWork = convertSMBWorkToJoinWork(currWork, originalSMBJoinOp); SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(currJoinWork); - currWork.setOpParseCtxMap(parseCtx.getOpParseCtx()); - currWork.setJoinTree(joinTree); - currJoinWork.setOpParseCtxMap(parseCtx.getOpParseCtx()); - currJoinWork.setJoinTree(joinTree); + currWork.getMapWork().setOpParseCtxMap(parseCtx.getOpParseCtx()); + currWork.getMapWork().setJoinTree(joinTree); + currJoinWork.getMapWork().setOpParseCtxMap(parseCtx.getOpParseCtx()); + currJoinWork.getMapWork().setJoinTree(joinTree); // create conditional work list and task list List listWorks = new ArrayList(); @@ -272,7 +275,7 @@ private boolean isEligibleForOptimization(SMBMapJoinOperator originalSMBJoinOp) HashMap> aliasToTask = new HashMap>(); // Note that pathToAlias will behave as if the original plan was a join plan - HashMap> pathToAliases = currJoinWork.getPathToAliases(); + HashMap> pathToAliases = currJoinWork.getMapWork().getPathToAliases(); // generate a map join task for the big table SMBJoinDesc originalSMBJoinDesc = originalSMBJoinOp.getConf(); @@ -289,7 +292,7 @@ private boolean isEligibleForOptimization(SMBMapJoinOperator originalSMBJoinOp) HashMap aliasToSize = new HashMap(); Configuration conf = context.getConf(); try { - long aliasTotalKnownInputSize = getTotalKnownInputSize(context, currJoinWork, + long aliasTotalKnownInputSize = getTotalKnownInputSize(context, currJoinWork.getMapWork(), pathToAliases, aliasToSize); String xml = currJoinWork.toXML(); @@ -339,8 +342,8 @@ private boolean isEligibleForOptimization(SMBMapJoinOperator originalSMBJoinOp) listWorks.add(currTask.getWork()); listTasks.add(currTask); // clear JoinTree and OP Parse Context - currWork.setOpParseCtxMap(null); - currWork.setJoinTree(null); + currWork.getMapWork().setOpParseCtxMap(null); + currWork.getMapWork().setJoinTree(null); // create conditional task and insert conditional task into task tree ConditionalWork cndWork = new ConditionalWork(listWorks); @@ -417,9 +420,9 @@ private SMBMapJoinOperator getSMBMapJoinOp(Operator curr } private SMBMapJoinOperator getSMBMapJoinOp(MapredWork work) throws SemanticException { - if (work != null) { - Operator reducer = work.getReducer(); - for (Operator op : work.getAliasToWork().values()) { + if (work != null && work.getReduceWork() != null) { + Operator reducer = work.getReduceWork().getReducer(); + for (Operator op : work.getMapWork().getAliasToWork().values()) { SMBMapJoinOperator smbMapJoinOp = getSMBMapJoinOp(op, reducer); if (smbMapJoinOp != null) { return smbMapJoinOp; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java index 8bce7c3..5c6751c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; @@ -162,7 +163,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex); // prepare the map reduce job to use indexing - MapredWork work = currentTask.getWork(); + MapWork work = currentTask.getWork().getMapWork(); work.setInputformat(queryContext.getIndexInputFormat()); work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile()); // modify inputs based on index query @@ -204,7 +205,7 @@ private void rewriteForIndexes(ExprNodeDesc predicate, List indexes, // check the size try { - ContentSummary inputSummary = Utilities.getInputSummary(pctx.getContext(), task.getWork(), null); + ContentSummary inputSummary = Utilities.getInputSummary(pctx.getContext(), task.getWork().getMapWork(), null); long inputSize = inputSummary.getLength(); if (!indexHandler.checkQuerySize(inputSize, pctx.getConf())) { queryContext.setQueryTasks(null); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java index 61c1be7..ef86266 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java @@ -93,7 +93,7 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) GraphWalker ogw = new DefaultGraphWalker(dispatcher); ArrayList topNodes = new ArrayList(); if (task.getWork() instanceof MapredWork) { - topNodes.addAll(((MapredWork)task.getWork()).getAliasToWork().values()); + topNodes.addAll(((MapredWork)task.getWork()).getMapWork().getAliasToWork().values()); } else { return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java index fb2b537..374b138 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java @@ -84,6 +84,7 @@ import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -350,7 +351,7 @@ public void compile(final ParseContext pCtx, final List op) { + private void setInputFormat(MapWork work, Operator op) { if (op.isUseBucketizedHiveInputFormat()) { work.setUseBucketizedHiveInputFormat(true); return; @@ -366,7 +367,7 @@ private void setInputFormat(MapredWork work, Operator op // loop over all the tasks recursively private void setInputFormat(Task task) { if (task instanceof ExecDriver) { - MapredWork work = (MapredWork) task.getWork(); + MapWork work = ((MapredWork) task.getWork()).getMapWork(); HashMap> opMap = work.getAliasToWork(); if (!opMap.isEmpty()) { for (Operator op : opMap.values()) { @@ -392,16 +393,16 @@ private void setInputFormat(Task task) { private void generateCountersTask(Task task) { if (task instanceof ExecDriver) { HashMap> opMap = ((MapredWork) task - .getWork()).getAliasToWork(); + .getWork()).getMapWork().getAliasToWork(); if (!opMap.isEmpty()) { for (Operator op : opMap.values()) { generateCountersOperator(op); } } - Operator reducer = ((MapredWork) task.getWork()) - .getReducer(); - if (reducer != null) { + if (((MapredWork)task.getWork()).getReduceWork() != null) { + Operator reducer = ((MapredWork) task.getWork()).getReduceWork() + .getReducer(); LOG.info("Generating counters for operator " + reducer); generateCountersOperator(reducer); } @@ -457,7 +458,7 @@ private void breakTaskTree(Task task) { if (task instanceof ExecDriver) { HashMap> opMap = ((MapredWork) task - .getWork()).getAliasToWork(); + .getWork()).getMapWork().getAliasToWork(); if (!opMap.isEmpty()) { for (Operator op : opMap.values()) { breakOperatorTree(op); @@ -560,12 +561,12 @@ private void getLeafTasks(Task task, * Make a best guess at trying to find the number of reducers */ private static int getNumberOfReducers(MapredWork mrwork, HiveConf conf) { - if (mrwork.getReducer() == null) { + if (mrwork.getReduceWork() == null) { return 0; } - if (mrwork.getNumReduceTasks() >= 0) { - return mrwork.getNumReduceTasks(); + if (mrwork.getReduceWork().getNumReduceTasks() >= 0) { + return mrwork.getReduceWork().getNumReduceTasks(); } return conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS); @@ -600,7 +601,7 @@ public boolean accept(Path file) { for (ExecDriver mrtask : mrtasks) { try { ContentSummary inputSummary = Utilities.getInputSummary - (ctx, (MapredWork) mrtask.getWork(), p); + (ctx, ((MapredWork) mrtask.getWork()).getMapWork(), p); int numReducers = getNumberOfReducers(mrtask.getWork(), conf); long estimatedInput; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java new file mode 100644 index 0000000..20d4809 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; +import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; + +@SuppressWarnings({"serial", "deprecation"}) +public abstract class BaseWork extends AbstractOperatorDesc { + + private boolean gatheringStats; + + // If this map reduce task has a FileSinkOperator, and bucketing/sorting metadata can be + // inferred about the data being written by that operator, these are mappings from the directory + // that operator writes into to the bucket/sort columns for that data. + private final Map> bucketedColsByDirectory = + new HashMap>(); + private final Map> sortedColsByDirectory = + new HashMap>(); + + @Explain(displayName = "Path -> Bucketed Columns", normalExplain = false) + public Map> getBucketedColsByDirectory() { + return bucketedColsByDirectory; + } + + @Explain(displayName = "Path -> Sorted Columns", normalExplain = false) + public Map> getSortedColsByDirectory() { + return sortedColsByDirectory; + } + + public void setGatheringStats(boolean gatherStats) { + this.gatheringStats = gatherStats; + } + + public boolean isGatheringStats() { + return this.gatheringStats; + } + + protected abstract List> getAllRootOperators(); + + public List> getAllOperators() { + + List> returnList = new ArrayList>(); + List> opList = getAllRootOperators(); + + //recursively add all children + while (!opList.isEmpty()) { + Operator op = opList.remove(0); + if (op.getChildOperators() != null) { + opList.addAll(op.getChildOperators()); + } + returnList.add(op); + } + + return returnList; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java index 2ad8f78..dfb0f48 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java @@ -144,7 +144,12 @@ public void setLbCtx(ListBucketingCtx lbCtx) { if (inpFs.exists(dirPath)) { // For each dynamic partition, check if it needs to be merged. - MapredWork work = (MapredWork) mrTask.getWork(); + MapWork work; + if (mrTask.getWork() instanceof MapredWork) { + work = ((MapredWork) mrTask.getWork()).getMapWork(); + } else { + work = (MapWork) mrTask.getWork(); + } int lbLevel = (ctx.getLbCtx() == null) ? 0 : ctx.getLbCtx().calculateListBucketingLevel(); @@ -222,7 +227,7 @@ public void setLbCtx(ListBucketingCtx lbCtx) { private void generateActualTasks(HiveConf conf, List> resTsks, long trgtSize, long avgConditionSize, Task mvTask, Task mrTask, Task mrAndMvTask, Path dirPath, - FileSystem inpFs, ConditionalResolverMergeFilesCtx ctx, MapredWork work, int dpLbLevel) + FileSystem inpFs, ConditionalResolverMergeFilesCtx ctx, MapWork work, int dpLbLevel) throws IOException { DynamicPartitionCtx dpCtx = ctx.getDPCtx(); // get list of dynamic partitions @@ -319,18 +324,11 @@ private PartitionDesc generateDPFullPartSpec(DynamicPartitionCtx dpCtx, FileStat return pDesc; } - private void setupMapRedWork(HiveConf conf, MapredWork work, long targetSize, long totalSize) { - if (work.getNumReduceTasks() > 0) { - int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS); - int reducers = (int) ((totalSize + targetSize - 1) / targetSize); - reducers = Math.max(1, reducers); - reducers = Math.min(maxReducers, reducers); - work.setNumReduceTasks(reducers); - } - work.setMaxSplitSize(targetSize); - work.setMinSplitSize(targetSize); - work.setMinSplitSizePerNode(targetSize); - work.setMinSplitSizePerRack(targetSize); + private void setupMapRedWork(HiveConf conf, MapWork mWork, long targetSize, long totalSize) { + mWork.setMaxSplitSize(targetSize); + mWork.setMinSplitSize(targetSize); + mWork.setMinSplitSizePerNode(targetSize); + mWork.setMinSplitSizePerRack(targetSize); } private static class AverageSize { @@ -352,7 +350,6 @@ public int getNumFiles() { } private AverageSize getAverageSize(FileSystem inpFs, Path dirPath) { - AverageSize dummy = new AverageSize(0, 0); AverageSize error = new AverageSize(-1, -1); try { FileStatus[] fStats = inpFs.listStatus(dirPath); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java index d2a52c9..a3408a0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java @@ -32,4 +32,6 @@ boolean normalExplain() default true; boolean displayOnlyOnTrue() default false; + + boolean skipHeader() default false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java new file mode 100644 index 0000000..aaa705e --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -0,0 +1,449 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.parse.OpParseContext; +import org.apache.hadoop.hive.ql.parse.QBJoinTree; +import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.mapred.JobConf; +import org.apache.log4j.Logger; + + +@SuppressWarnings({"serial", "deprecation"}) +public class MapWork extends BaseWork { + Logger LOG = Logger.getLogger(ReduceWork.class); + + private boolean hadoopSupportsSplittable; + + // map side work + // use LinkedHashMap to make sure the iteration order is + // deterministic, to ease testing + private LinkedHashMap> pathToAliases = new LinkedHashMap>(); + + private LinkedHashMap pathToPartitionInfo = new LinkedHashMap(); + + private LinkedHashMap> aliasToWork = new LinkedHashMap>(); + + private LinkedHashMap aliasToPartnInfo = new LinkedHashMap(); + + private HashMap nameToSplitSample = new LinkedHashMap(); + + private MapredLocalWork mapLocalWork; + private String tmpHDFSFileURI; + + private String inputformat; + + private String indexIntermediateFile; + + private Integer numMapTasks; + private Long maxSplitSize; + private Long minSplitSize; + private Long minSplitSizePerNode; + private Long minSplitSizePerRack; + + //use sampled partitioning + private int samplingType; + + public static final int SAMPLING_ON_PREV_MR = 1; // todo HIVE-3841 + public static final int SAMPLING_ON_START = 2; // sampling on task running + + // the following two are used for join processing + private QBJoinTree joinTree; + private LinkedHashMap, OpParseContext> opParseCtxMap; + + private boolean mapperCannotSpanPartns; + + // used to indicate the input is sorted, and so a BinarySearchRecordReader shoudl be used + private boolean inputFormatSorted = false; + + private transient boolean useBucketizedHiveInputFormat; + + public MapWork() { + } + + @Explain(displayName = "Path -> Alias", normalExplain = false) + public LinkedHashMap> getPathToAliases() { + return pathToAliases; + } + + public void setPathToAliases( + final LinkedHashMap> pathToAliases) { + this.pathToAliases = pathToAliases; + } + + /** + * This is used to display and verify output of "Path -> Alias" in test framework. + * + * QTestUtil masks "Path -> Alias" and makes verification impossible. + * By keeping "Path -> Alias" intact and adding a new display name which is not + * masked by QTestUtil by removing prefix. + * + * Notes: we would still be masking for intermediate directories. + * + * @return + */ + @Explain(displayName = "Truncated Path -> Alias", normalExplain = false) + public Map> getTruncatedPathToAliases() { + Map> trunPathToAliases = new LinkedHashMap>(); + Iterator>> itr = this.pathToAliases.entrySet().iterator(); + while (itr.hasNext()) { + final Entry> entry = itr.next(); + String origiKey = entry.getKey(); + String newKey = PlanUtils.removePrefixFromWarehouseConfig(origiKey); + ArrayList value = entry.getValue(); + trunPathToAliases.put(newKey, value); + } + return trunPathToAliases; + } + + @Explain(displayName = "Path -> Partition", normalExplain = false) + public LinkedHashMap getPathToPartitionInfo() { + return pathToPartitionInfo; + } + + public void setPathToPartitionInfo( + final LinkedHashMap pathToPartitionInfo) { + this.pathToPartitionInfo = pathToPartitionInfo; + } + + /** + * Derive additional attributes to be rendered by EXPLAIN. + */ + public void deriveExplainAttributes() { + if (pathToPartitionInfo != null) { + for (Map.Entry entry : pathToPartitionInfo + .entrySet()) { + entry.getValue().deriveBaseFileName(entry.getKey()); + } + } + if (mapLocalWork != null) { + mapLocalWork.deriveExplainAttributes(); + } + } + + /** + * @return the aliasToPartnInfo + */ + public LinkedHashMap getAliasToPartnInfo() { + return aliasToPartnInfo; + } + + /** + * @param aliasToPartnInfo + * the aliasToPartnInfo to set + */ + public void setAliasToPartnInfo( + LinkedHashMap aliasToPartnInfo) { + this.aliasToPartnInfo = aliasToPartnInfo; + } + + @Explain(displayName = "Alias -> Map Operator Tree") + public LinkedHashMap> getAliasToWork() { + return aliasToWork; + } + + public void setAliasToWork( + final LinkedHashMap> aliasToWork) { + this.aliasToWork = aliasToWork; + } + + /** + * @return the mapredLocalWork + */ + @Explain(displayName = "Local Work") + public MapredLocalWork getMapLocalWork() { + return mapLocalWork; + } + + /** + * @param mapLocalWork + * the mapredLocalWork to set + */ + public void setMapLocalWork(final MapredLocalWork mapLocalWork) { + this.mapLocalWork = mapLocalWork; + } + + + @Explain(displayName = "Split Sample") + public HashMap getNameToSplitSample() { + return nameToSplitSample; + } + + public void setNameToSplitSample(HashMap nameToSplitSample) { + this.nameToSplitSample = nameToSplitSample; + } + + public Integer getNumMapTasks() { + return numMapTasks; + } + + public void setNumMapTasks(Integer numMapTasks) { + this.numMapTasks = numMapTasks; + } + + @SuppressWarnings("nls") + public void addMapWork(String path, String alias, Operator work, + PartitionDesc pd) { + ArrayList curAliases = pathToAliases.get(path); + if (curAliases == null) { + assert (pathToPartitionInfo.get(path) == null); + curAliases = new ArrayList(); + pathToAliases.put(path, curAliases); + pathToPartitionInfo.put(path, pd); + } else { + assert (pathToPartitionInfo.get(path) != null); + } + + for (String oneAlias : curAliases) { + if (oneAlias.equals(alias)) { + throw new RuntimeException("Multiple aliases named: " + alias + + " for path: " + path); + } + } + curAliases.add(alias); + + if (aliasToWork.get(alias) != null) { + throw new RuntimeException("Existing work for alias: " + alias); + } + aliasToWork.put(alias, work); + } + + public boolean isInputFormatSorted() { + return inputFormatSorted; + } + + public void setInputFormatSorted(boolean inputFormatSorted) { + this.inputFormatSorted = inputFormatSorted; + } + + public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf, Path path, + TableDesc tblDesc, ArrayList aliases, PartitionDesc partDesc) { + pathToAliases.put(path.toString(), aliases); + pathToPartitionInfo.put(path.toString(), partDesc); + } + + /** + * For each map side operator - stores the alias the operator is working on + * behalf of in the operator runtime state. This is used by reduce sink + * operator - but could be useful for debugging as well. + */ + private void setAliases() { + if(aliasToWork == null) { + return; + } + for (String oneAlias : aliasToWork.keySet()) { + aliasToWork.get(oneAlias).setAlias(oneAlias); + } + } + + @Override + protected List> getAllRootOperators() { + ArrayList> opList = new ArrayList>(); + + Map> pa = getPathToAliases(); + if (pa != null) { + for (List ls : pa.values()) { + for (String a : ls) { + Operator op = getAliasToWork().get(a); + if (op != null ) { + opList.add(op); + } + } + } + } + return opList; + } + + public void mergeAliasedInput(String alias, String pathDir, PartitionDesc partitionInfo) { + ArrayList aliases = pathToAliases.get(pathDir); + if (aliases == null) { + aliases = new ArrayList(Arrays.asList(alias)); + pathToAliases.put(pathDir, aliases); + pathToPartitionInfo.put(pathDir, partitionInfo); + } else { + aliases.add(alias); + } + } + + public void initialize() { + setAliases(); + } + + public Long getMaxSplitSize() { + return maxSplitSize; + } + + public void setMaxSplitSize(Long maxSplitSize) { + this.maxSplitSize = maxSplitSize; + } + + public Long getMinSplitSize() { + return minSplitSize; + } + + public void setMinSplitSize(Long minSplitSize) { + this.minSplitSize = minSplitSize; + } + + public Long getMinSplitSizePerNode() { + return minSplitSizePerNode; + } + + public void setMinSplitSizePerNode(Long minSplitSizePerNode) { + this.minSplitSizePerNode = minSplitSizePerNode; + } + + public Long getMinSplitSizePerRack() { + return minSplitSizePerRack; + } + + public void setMinSplitSizePerRack(Long minSplitSizePerRack) { + this.minSplitSizePerRack = minSplitSizePerRack; + } + + public String getInputformat() { + return inputformat; + } + + public void setInputformat(String inputformat) { + this.inputformat = inputformat; + } + public boolean isUseBucketizedHiveInputFormat() { + return useBucketizedHiveInputFormat; + } + + public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat) { + this.useBucketizedHiveInputFormat = useBucketizedHiveInputFormat; + } + + public QBJoinTree getJoinTree() { + return joinTree; + } + + public void setJoinTree(QBJoinTree joinTree) { + this.joinTree = joinTree; + } + + public void setMapperCannotSpanPartns(boolean mapperCannotSpanPartns) { + this.mapperCannotSpanPartns = mapperCannotSpanPartns; + } + + public boolean isMapperCannotSpanPartns() { + return this.mapperCannotSpanPartns; + } + + public boolean getHadoopSupportsSplittable() { + return hadoopSupportsSplittable; + } + + public void setHadoopSupportsSplittable(boolean hadoopSupportsSplittable) { + this.hadoopSupportsSplittable = hadoopSupportsSplittable; + } + + public String getIndexIntermediateFile() { + return indexIntermediateFile; + } + + public ArrayList getAliases() { + return new ArrayList(aliasToWork.keySet()); + } + + public ArrayList> getWorks() { + return new ArrayList>(aliasToWork.values()); + } + + public ArrayList getPaths() { + return new ArrayList(pathToAliases.keySet()); + } + + public ArrayList getPartitionDescs() { + return new ArrayList(aliasToPartnInfo.values()); + } + + public + LinkedHashMap, OpParseContext> getOpParseCtxMap() { + return opParseCtxMap; + } + + public void setOpParseCtxMap( + LinkedHashMap, OpParseContext> opParseCtxMap) { + this.opParseCtxMap = opParseCtxMap; + } + + public String getTmpHDFSFileURI() { + return tmpHDFSFileURI; + } + + public void setTmpHDFSFileURI(String tmpHDFSFileURI) { + this.tmpHDFSFileURI = tmpHDFSFileURI; + } + + public void mergingInto(MapWork mapWork) { + // currently, this is sole field affecting mergee task + mapWork.useBucketizedHiveInputFormat |= useBucketizedHiveInputFormat; + } + + public void addIndexIntermediateFile(String fileName) { + if (this.indexIntermediateFile == null) { + this.indexIntermediateFile = fileName; + } else { + this.indexIntermediateFile += "," + fileName; + } + } + + public int getSamplingType() { + return samplingType; + } + + public void setSamplingType(int samplingType) { + this.samplingType = samplingType; + } + + @Explain(displayName = "Sampling") + public String getSamplingTypeString() { + return samplingType == 1 ? "SAMPLING_ON_PREV_MR" : + samplingType == 2 ? "SAMPLING_ON_START" : null; + } + + public void configureJobConf(JobConf job) { + for (PartitionDesc partition : aliasToPartnInfo.values()) { + PlanUtils.configureJobConf(partition.getTableDesc(), job); + } + Collection> mappers = aliasToWork.values(); + for (FileSinkOperator fs : OperatorUtils.findOperators(mappers, FileSinkOperator.class)) { + PlanUtils.configureJobConf(fs.getConf().getTableInfo(), job); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index 7cbb1ff..cf6795b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -20,28 +20,13 @@ import java.io.ByteArrayOutputStream; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; -import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; -import org.apache.hadoop.hive.ql.parse.OpParseContext; -import org.apache.hadoop.hive.ql.parse.QBJoinTree; -import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.mapred.JobConf; + /** * MapredWork. * @@ -49,585 +34,59 @@ @Explain(displayName = "Map Reduce") public class MapredWork extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; - private String command; - // map side work - // use LinkedHashMap to make sure the iteration order is - // deterministic, to ease testing - private LinkedHashMap> pathToAliases; - - private LinkedHashMap pathToPartitionInfo; - - private LinkedHashMap> aliasToWork; - - private LinkedHashMap aliasToPartnInfo; - - private HashMap nameToSplitSample; - - // map<->reduce interface - // schema of the map-reduce 'key' object - this is homogeneous - private TableDesc keyDesc; - - // schema of the map-reduce 'val' object - this is heterogeneous - private List tagToValueDesc; - - private Operator reducer; - - private Integer numReduceTasks; - private Integer numMapTasks; - private Long maxSplitSize; - private Long minSplitSize; - private Long minSplitSizePerNode; - private Long minSplitSizePerRack; - - private boolean needsTagging; - private boolean hadoopSupportsSplittable; - - private MapredLocalWork mapLocalWork; - private String inputformat; - private String indexIntermediateFile; - private boolean gatheringStats; - - private String tmpHDFSFileURI; - - private LinkedHashMap, OpParseContext> opParseCtxMap; - private QBJoinTree joinTree; + private MapWork mapWork = new MapWork(); + private ReduceWork reduceWork = null; - private boolean mapperCannotSpanPartns; - - // used to indicate the input is sorted, and so a BinarySearchRecordReader shoudl be used - private boolean inputFormatSorted = false; - - private transient boolean useBucketizedHiveInputFormat; - - // if this is true, this means that this is the map reduce task which writes the final data, - // ignoring the optional merge task - private boolean finalMapRed = false; - - // If this map reduce task has a FileSinkOperator, and bucketing/sorting metadata can be - // inferred about the data being written by that operator, these are mappings from the directory - // that operator writes into to the bucket/sort columns for that data. - private final Map> bucketedColsByDirectory = - new HashMap>(); - private final Map> sortedColsByDirectory = - new HashMap>(); - - // use sampled partitioning - private int samplingType; - - public static final int SAMPLING_ON_PREV_MR = 1; // todo HIVE-3841 - public static final int SAMPLING_ON_START = 2; // sampling on task running - - public MapredWork() { - aliasToPartnInfo = new LinkedHashMap(); - } - - public MapredWork( - final String command, - final LinkedHashMap> pathToAliases, - final LinkedHashMap pathToPartitionInfo, - final LinkedHashMap> aliasToWork, - final TableDesc keyDesc, List tagToValueDesc, - final Operator reducer, final Integer numReduceTasks, - final MapredLocalWork mapLocalWork, - final boolean hadoopSupportsSplittable) { - this.command = command; - this.pathToAliases = pathToAliases; - this.pathToPartitionInfo = pathToPartitionInfo; - this.aliasToWork = aliasToWork; - this.keyDesc = keyDesc; - this.tagToValueDesc = tagToValueDesc; - this.reducer = reducer; - this.numReduceTasks = numReduceTasks; - this.mapLocalWork = mapLocalWork; - aliasToPartnInfo = new LinkedHashMap(); - this.hadoopSupportsSplittable = hadoopSupportsSplittable; - maxSplitSize = null; - minSplitSize = null; - minSplitSizePerNode = null; - minSplitSizePerRack = null; - } - - public String getCommand() { - return command; - } + private boolean finalMapRed; - public void setCommand(final String command) { - this.command = command; - } - - @Explain(displayName = "Path -> Alias", normalExplain = false) - public LinkedHashMap> getPathToAliases() { - return pathToAliases; - } - - public void setPathToAliases( - final LinkedHashMap> pathToAliases) { - this.pathToAliases = pathToAliases; - } - - @Explain(displayName = "Truncated Path -> Alias", normalExplain = false) - /** - * This is used to display and verify output of "Path -> Alias" in test framework. - * - * {@link QTestUtil} masks "Path -> Alias" and makes verification impossible. - * By keeping "Path -> Alias" intact and adding a new display name which is not - * masked by {@link QTestUtil} by removing prefix. - * - * Notes: we would still be masking for intermediate directories. - * - * @return - */ - public Map> getTruncatedPathToAliases() { - Map> trunPathToAliases = new LinkedHashMap>(); - Iterator>> itr = this.pathToAliases.entrySet().iterator(); - while (itr.hasNext()) { - final Entry> entry = itr.next(); - String origiKey = entry.getKey(); - String newKey = PlanUtils.removePrefixFromWarehouseConfig(origiKey); - ArrayList value = entry.getValue(); - trunPathToAliases.put(newKey, value); - } - return trunPathToAliases; - } - - - - @Explain(displayName = "Path -> Partition", normalExplain = false) - public LinkedHashMap getPathToPartitionInfo() { - return pathToPartitionInfo; - } - - public void setPathToPartitionInfo( - final LinkedHashMap pathToPartitionInfo) { - this.pathToPartitionInfo = pathToPartitionInfo; - } - - /** - * @return the aliasToPartnInfo - */ - public LinkedHashMap getAliasToPartnInfo() { - return aliasToPartnInfo; - } - - /** - * @param aliasToPartnInfo - * the aliasToPartnInfo to set - */ - public void setAliasToPartnInfo( - LinkedHashMap aliasToPartnInfo) { - this.aliasToPartnInfo = aliasToPartnInfo; - } - - @Explain(displayName = "Alias -> Map Operator Tree") - public LinkedHashMap> getAliasToWork() { - return aliasToWork; - } - - public void setAliasToWork( - final LinkedHashMap> aliasToWork) { - this.aliasToWork = aliasToWork; - } - - public void mergeAliasedInput(String alias, String pathDir, PartitionDesc partitionInfo) { - ArrayList aliases = pathToAliases.get(pathDir); - if (aliases == null) { - aliases = new ArrayList(Arrays.asList(alias)); - pathToAliases.put(pathDir, aliases); - pathToPartitionInfo.put(pathDir, partitionInfo); - } else { - aliases.add(alias); - } + @Explain(skipHeader = true, displayName = "Map") + public MapWork getMapWork() { + return mapWork; } - public ArrayList getAliases() { - return new ArrayList(aliasToWork.keySet()); + public void setMapWork(MapWork mapWork) { + this.mapWork = mapWork; } - public ArrayList> getWorks() { - return new ArrayList>(aliasToWork.values()); + @Explain(skipHeader = true, displayName = "Reduce") + public ReduceWork getReduceWork() { + return reduceWork; } - public ArrayList getPaths() { - return new ArrayList(pathToAliases.keySet()); + public void setReduceWork(ReduceWork reduceWork) { + this.reduceWork = reduceWork; } - public ArrayList getPartitionDescs() { - return new ArrayList(aliasToPartnInfo.values()); - } - - /** - * @return the mapredLocalWork - */ - @Explain(displayName = "Local Work") - public MapredLocalWork getMapLocalWork() { - return mapLocalWork; - } - - /** - * @param mapLocalWork - * the mapredLocalWork to set - */ - public void setMapLocalWork(final MapredLocalWork mapLocalWork) { - this.mapLocalWork = mapLocalWork; - } - - public TableDesc getKeyDesc() { - return keyDesc; - } - - /** - * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing - * to keySerializeInfo of the ReduceSink - * - * @param keyDesc - */ - public void setKeyDesc(final TableDesc keyDesc) { - this.keyDesc = keyDesc; - } - - public List getTagToValueDesc() { - return tagToValueDesc; - } - - public void setTagToValueDesc(final List tagToValueDesc) { - this.tagToValueDesc = tagToValueDesc; - } - - @Explain(displayName = "Reduce Operator Tree") - public Operator getReducer() { - return reducer; - } - - @Explain(displayName = "Split Sample") - public HashMap getNameToSplitSample() { - return nameToSplitSample; - } - - public void setNameToSplitSample(HashMap nameToSplitSample) { - this.nameToSplitSample = nameToSplitSample; - } - - public void setReducer(final Operator reducer) { - this.reducer = reducer; - } - - public Integer getNumMapTasks() { - return numMapTasks; - } - - public void setNumMapTasks(Integer numMapTasks) { - this.numMapTasks = numMapTasks; - } - - /** - * If the number of reducers is -1, the runtime will automatically figure it - * out by input data size. - * - * The number of reducers will be a positive number only in case the target - * table is bucketed into N buckets (through CREATE TABLE). This feature is - * not supported yet, so the number of reducers will always be -1 for now. - */ - public Integer getNumReduceTasks() { - return numReduceTasks; - } - - public void setNumReduceTasks(final Integer numReduceTasks) { - this.numReduceTasks = numReduceTasks; - } - - @Explain(displayName = "Path -> Bucketed Columns", normalExplain = false) - public Map> getBucketedColsByDirectory() { - return bucketedColsByDirectory; + public boolean isFinalMapRed() { + return finalMapRed; } - @Explain(displayName = "Path -> Sorted Columns", normalExplain = false) - public Map> getSortedColsByDirectory() { - return sortedColsByDirectory; + public void setFinalMapRed(boolean finalMapRed) { + this.finalMapRed = finalMapRed; } - @SuppressWarnings("nls") - public void addMapWork(String path, String alias, Operator work, - PartitionDesc pd) { - ArrayList curAliases = pathToAliases.get(path); - if (curAliases == null) { - assert (pathToPartitionInfo.get(path) == null); - curAliases = new ArrayList(); - pathToAliases.put(path, curAliases); - pathToPartitionInfo.put(path, pd); - } else { - assert (pathToPartitionInfo.get(path) != null); - } - - for (String oneAlias : curAliases) { - if (oneAlias.equals(alias)) { - throw new RuntimeException("Multiple aliases named: " + alias - + " for path: " + path); - } + public void configureJobConf(JobConf job) { + mapWork.configureJobConf(job); + if (reduceWork != null) { + reduceWork.configureJobConf(job); } - curAliases.add(alias); - - if (aliasToWork.get(alias) != null) { - throw new RuntimeException("Existing work for alias: " + alias); - } - aliasToWork.put(alias, work); } - @SuppressWarnings("nls") - public String isInvalid() { - if ((getNumReduceTasks() >= 1) && (getReducer() == null)) { - return "Reducers > 0 but no reduce operator"; - } - - if ((getNumReduceTasks() == 0) && (getReducer() != null)) { - return "Reducers == 0 but reduce operator specified"; + public List> getAllOperators() { + List> ops = new ArrayList>(); + ops.addAll(mapWork.getAllOperators()); + if (reduceWork != null) { + ops.addAll(reduceWork.getAllOperators()); } - return null; + return ops; } public String toXML() { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Utilities.serializeMapRedWork(this, baos); + Utilities.serializeObject(this, baos); return (baos.toString()); } - // non bean - - /** - * For each map side operator - stores the alias the operator is working on - * behalf of in the operator runtime state. This is used by reducesink - * operator - but could be useful for debugging as well. - */ - private void setAliases() { - if(aliasToWork == null) { - return; - } - for (String oneAlias : aliasToWork.keySet()) { - aliasToWork.get(oneAlias).setAlias(oneAlias); - } - } - - /** - * Derive additional attributes to be rendered by EXPLAIN. - */ - public void deriveExplainAttributes() { - if (pathToPartitionInfo != null) { - for (Map.Entry entry : pathToPartitionInfo - .entrySet()) { - entry.getValue().deriveBaseFileName(entry.getKey()); - } - } - if (mapLocalWork != null) { - mapLocalWork.deriveExplainAttributes(); - } - } - - public void initialize() { - setAliases(); - } - - @Explain(displayName = "Needs Tagging", normalExplain = false) - public boolean getNeedsTagging() { - return needsTagging; - } - - public void setNeedsTagging(boolean needsTagging) { - this.needsTagging = needsTagging; - } - - public boolean getHadoopSupportsSplittable() { - return hadoopSupportsSplittable; - } - - public void setHadoopSupportsSplittable(boolean hadoopSupportsSplittable) { - this.hadoopSupportsSplittable = hadoopSupportsSplittable; - } - - public Long getMaxSplitSize() { - return maxSplitSize; - } - - public void setMaxSplitSize(Long maxSplitSize) { - this.maxSplitSize = maxSplitSize; - } - - public Long getMinSplitSize() { - return minSplitSize; - } - - public void setMinSplitSize(Long minSplitSize) { - this.minSplitSize = minSplitSize; - } - - public Long getMinSplitSizePerNode() { - return minSplitSizePerNode; - } - - public void setMinSplitSizePerNode(Long minSplitSizePerNode) { - this.minSplitSizePerNode = minSplitSizePerNode; - } - - public Long getMinSplitSizePerRack() { - return minSplitSizePerRack; - } - - public void setMinSplitSizePerRack(Long minSplitSizePerRack) { - this.minSplitSizePerRack = minSplitSizePerRack; - } - - public String getInputformat() { - return inputformat; - } - - public void setInputformat(String inputformat) { - this.inputformat = inputformat; - } - - public String getIndexIntermediateFile() { - return indexIntermediateFile; - } - - public void addIndexIntermediateFile(String fileName) { - if (this.indexIntermediateFile == null) { - this.indexIntermediateFile = fileName; - } else { - this.indexIntermediateFile += "," + fileName; - } - } - - public void setGatheringStats(boolean gatherStats) { - this.gatheringStats = gatherStats; - } - - public boolean isGatheringStats() { - return this.gatheringStats; - } - - public void setMapperCannotSpanPartns(boolean mapperCannotSpanPartns) { - this.mapperCannotSpanPartns = mapperCannotSpanPartns; - } - - public boolean isMapperCannotSpanPartns() { - return this.mapperCannotSpanPartns; - } - - public String getTmpHDFSFileURI() { - return tmpHDFSFileURI; - } - - public void setTmpHDFSFileURI(String tmpHDFSFileURI) { - this.tmpHDFSFileURI = tmpHDFSFileURI; - } - - - public QBJoinTree getJoinTree() { - return joinTree; - } - - public void setJoinTree(QBJoinTree joinTree) { - this.joinTree = joinTree; - } - - public - LinkedHashMap, OpParseContext> getOpParseCtxMap() { - return opParseCtxMap; - } - - public void setOpParseCtxMap( - LinkedHashMap, OpParseContext> opParseCtxMap) { - this.opParseCtxMap = opParseCtxMap; - } - - public boolean isInputFormatSorted() { - return inputFormatSorted; - } - - public void setInputFormatSorted(boolean inputFormatSorted) { - this.inputFormatSorted = inputFormatSorted; - } - - public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf, Path path, - TableDesc tblDesc, ArrayList aliases, PartitionDesc partDesc) { - pathToAliases.put(path.toString(), aliases); - pathToPartitionInfo.put(path.toString(), partDesc); - } - - public List> getAllOperators() { - ArrayList> opList = new ArrayList>(); - ArrayList> returnList = new ArrayList>(); - - if (getReducer() != null) { - opList.add(getReducer()); - } - - Map> pa = getPathToAliases(); - if (pa != null) { - for (List ls : pa.values()) { - for (String a : ls) { - Operator op = getAliasToWork().get(a); - if (op != null ) { - opList.add(op); - } - } - } - } - - //recursively add all children - while (!opList.isEmpty()) { - Operator op = opList.remove(0); - if (op.getChildOperators() != null) { - opList.addAll(op.getChildOperators()); - } - returnList.add(op); - } - - return returnList; - } - - public boolean isUseBucketizedHiveInputFormat() { - return useBucketizedHiveInputFormat; - } - - public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat) { - this.useBucketizedHiveInputFormat = useBucketizedHiveInputFormat; - } - - public boolean isFinalMapRed() { - return finalMapRed; - } - - public void setFinalMapRed(boolean finalMapRed) { - this.finalMapRed = finalMapRed; - } - - public void configureJobConf(JobConf jobConf) { - for (PartitionDesc partition : aliasToPartnInfo.values()) { - PlanUtils.configureJobConf(partition.getTableDesc(), jobConf); - } - Collection> mappers = aliasToWork.values(); - for (FileSinkOperator fs : OperatorUtils.findOperators(mappers, FileSinkOperator.class)) { - PlanUtils.configureJobConf(fs.getConf().getTableInfo(), jobConf); - } - if (reducer != null) { - for (FileSinkOperator fs : OperatorUtils.findOperators(reducer, FileSinkOperator.class)) { - PlanUtils.configureJobConf(fs.getConf().getTableInfo(), jobConf); - } - } - } - - public int getSamplingType() { - return samplingType; - } - - public void setSamplingType(int samplingType) { - this.samplingType = samplingType; - } - - @Explain(displayName = "Sampling") - public String getSamplingTypeString() { - return samplingType == 1 ? "SAMPLING_ON_PREV_MR" : - samplingType == 2 ? "SAMPLING_ON_START" : null; - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index edd62c8..5f983db 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -279,7 +279,7 @@ public PartitionDesc clone() { * @param path * URI to the partition file */ - void deriveBaseFileName(String path) { + public void deriveBaseFileName(String path) { PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc); if (path == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 89964f0..5fd8d828 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -33,7 +33,6 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; @@ -89,12 +88,10 @@ public static long getCountForMapJoinDumpFilePrefix() { @SuppressWarnings("nls") public static MapredWork getMapRedWork() { try { - return new MapredWork("", new LinkedHashMap>(), - new LinkedHashMap(), - new LinkedHashMap>(), - new TableDesc(), new ArrayList(), null, Integer.valueOf(1), - null, Hive.get().getConf().getBoolVar( + MapredWork work = new MapredWork(); + work.getMapWork().setHadoopSupportsSplittable(Hive.get().getConf().getBoolVar( HiveConf.ConfVars.HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE)); + return work; } catch (HiveException ex) { throw new RuntimeException(ex); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java new file mode 100644 index 0000000..5da9416 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.mapred.JobConf; +import org.apache.log4j.Logger; + +@SuppressWarnings({"serial", "deprecation"}) +public class ReduceWork extends BaseWork { + Logger LOG = Logger.getLogger(ReduceWork.class); + + // schema of the map-reduce 'key' object - this is homogeneous + private TableDesc keyDesc; + + // schema of the map-reduce 'value' object - this is heterogeneous + private List tagToValueDesc = new ArrayList(); + private Operator reducer; + private Integer numReduceTasks; + + private boolean needsTagging; + + /** + * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing + * to keySerializeInfo of the ReduceSink + * + * @param keyDesc + */ + public void setKeyDesc(final TableDesc keyDesc) { + this.keyDesc = keyDesc; + } + + public TableDesc getKeyDesc() { + return keyDesc; + } + + public List getTagToValueDesc() { + return tagToValueDesc; + } + + public void setTagToValueDesc(final List tagToValueDesc) { + this.tagToValueDesc = tagToValueDesc; + } + + @Explain(displayName = "Reduce Operator Tree") + public Operator getReducer() { + return reducer; + } + + public void setReducer(final Operator reducer) { + this.reducer = reducer; + } + + @Explain(displayName = "Needs Tagging", normalExplain = false) + public boolean getNeedsTagging() { + return needsTagging; + } + + public void setNeedsTagging(boolean needsTagging) { + this.needsTagging = needsTagging; + } + + @Override + protected List> getAllRootOperators() { + ArrayList> opList = new ArrayList>(); + opList.add(getReducer()); + return opList; + } + + /** + * If the number of reducers is -1, the runtime will automatically figure it + * out by input data size. + * + * The number of reducers will be a positive number only in case the target + * table is bucketed into N buckets (through CREATE TABLE). This feature is + * not supported yet, so the number of reducers will always be -1 for now. + */ + public Integer getNumReduceTasks() { + return numReduceTasks; + } + + public void setNumReduceTasks(final Integer numReduceTasks) { + this.numReduceTasks = numReduceTasks; + } + + public void configureJobConf(JobConf job) { + if (reducer != null) { + for (FileSinkOperator fs : OperatorUtils.findOperators(reducer, FileSinkOperator.class)) { + PlanUtils.configureJobConf(fs.getConf().getTableInfo(), job); + } + } + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java b/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java index 400abf3..360b2a6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java @@ -880,7 +880,7 @@ public int checkPlan(String tname, List> tasks) thr FileOutputStream ofs = new FileOutputStream(outf); for (Task plan : tasks) { - Utilities.serializeTasks(plan, ofs); + Utilities.serializeObject(plan, ofs); } String[] patterns = new String[] { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java index 6a74ae4..4371589 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.ScriptDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.serde.serdeConstants; @@ -136,7 +137,7 @@ protected void setUp() { } public static void addMapWork(MapredWork mr, Table tbl, String alias, Operator work) { - mr.addMapWork(tbl.getDataLocation().toString(), alias, work, new PartitionDesc( + mr.getMapWork().addMapWork(tbl.getDataLocation().toString(), alias, work, new PartitionDesc( Utilities.getTableDesc(tbl), null)); } @@ -191,7 +192,6 @@ private FilterDesc getTestFilterDesc(String column) { @SuppressWarnings("unchecked") private void populateMapPlan1(Table src) { - mr.setNumReduceTasks(Integer.valueOf(0)); Operator op2 = OperatorFactory.get(new FileSinkDesc(tmpdir + "mapplan1.out", Utilities.defaultTd, true)); @@ -203,7 +203,6 @@ private void populateMapPlan1(Table src) { @SuppressWarnings("unchecked") private void populateMapPlan2(Table src) { - mr.setNumReduceTasks(Integer.valueOf(0)); Operator op3 = OperatorFactory.get(new FileSinkDesc(tmpdir + "mapplan2.out", Utilities.defaultTd, false)); @@ -222,7 +221,6 @@ private void populateMapPlan2(Table src) { @SuppressWarnings("unchecked") private void populateMapRedPlan1(Table src) throws SemanticException { - mr.setNumReduceTasks(Integer.valueOf(1)); ArrayList outputColumns = new ArrayList(); for (int i = 0; i < 2; i++) { @@ -235,8 +233,11 @@ private void populateMapRedPlan1(Table src) throws SemanticException { -1, 1, -1)); addMapWork(mr, src, "a", op1); - mr.setKeyDesc(op1.getConf().getKeySerializeInfo()); - mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); + ReduceWork rWork = new ReduceWork(); + rWork.setNumReduceTasks(Integer.valueOf(1)); + rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); + rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); + mr.setReduceWork(rWork); // reduce side work Operator op3 = OperatorFactory.get(new FileSinkDesc(tmpdir @@ -245,12 +246,11 @@ private void populateMapRedPlan1(Table src) throws SemanticException { Operator op2 = OperatorFactory.get(new ExtractDesc( getStringColumn(Utilities.ReduceField.VALUE.toString())), op3); - mr.setReducer(op2); + rWork.setReducer(op2); } @SuppressWarnings("unchecked") private void populateMapRedPlan2(Table src) throws SemanticException { - mr.setNumReduceTasks(Integer.valueOf(1)); ArrayList outputColumns = new ArrayList(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); @@ -263,8 +263,11 @@ private void populateMapRedPlan2(Table src) throws SemanticException { outputColumns, false, -1, 1, -1)); addMapWork(mr, src, "a", op1); - mr.setKeyDesc(op1.getConf().getKeySerializeInfo()); - mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); + ReduceWork rWork = new ReduceWork(); + rWork.setNumReduceTasks(Integer.valueOf(1)); + rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); + rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); + mr.setReduceWork(rWork); // reduce side work Operator op4 = OperatorFactory.get(new FileSinkDesc(tmpdir @@ -275,7 +278,7 @@ private void populateMapRedPlan2(Table src) throws SemanticException { Operator op2 = OperatorFactory.get(new ExtractDesc( getStringColumn(Utilities.ReduceField.VALUE.toString())), op3); - mr.setReducer(op2); + rWork.setReducer(op2); } /** @@ -283,8 +286,6 @@ private void populateMapRedPlan2(Table src) throws SemanticException { */ @SuppressWarnings("unchecked") private void populateMapRedPlan3(Table src, Table src2) throws SemanticException { - mr.setNumReduceTasks(Integer.valueOf(5)); - mr.setNeedsTagging(true); ArrayList outputColumns = new ArrayList(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); @@ -296,8 +297,6 @@ private void populateMapRedPlan3(Table src, Table src2) throws SemanticException Byte.valueOf((byte) 0), 1, -1)); addMapWork(mr, src, "a", op1); - mr.setKeyDesc(op1.getConf().getKeySerializeInfo()); - mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); Operator op2 = OperatorFactory.get(PlanUtils .getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), @@ -305,7 +304,14 @@ private void populateMapRedPlan3(Table src, Table src2) throws SemanticException Byte.valueOf((byte) 1), Integer.MAX_VALUE, -1)); addMapWork(mr, src2, "b", op2); - mr.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo()); + ReduceWork rWork = new ReduceWork(); + rWork.setNumReduceTasks(Integer.valueOf(5)); + rWork.setNeedsTagging(true); + rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); + rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); + + mr.setReduceWork(rWork); + rWork.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo()); // reduce side work Operator op4 = OperatorFactory.get(new FileSinkDesc(tmpdir @@ -319,12 +325,11 @@ private void populateMapRedPlan3(Table src, Table src2) throws SemanticException Utilities.ReduceField.VALUE.toString(), "", false), "0", false)), outputColumns), op4); - mr.setReducer(op5); + rWork.setReducer(op5); } @SuppressWarnings("unchecked") private void populateMapRedPlan4(Table src) throws SemanticException { - mr.setNumReduceTasks(Integer.valueOf(1)); // map-side work ArrayList outputColumns = new ArrayList(); @@ -347,8 +352,11 @@ private void populateMapRedPlan4(Table src) throws SemanticException { outputColumns), op0); addMapWork(mr, src, "a", op4); - mr.setKeyDesc(op1.getConf().getKeySerializeInfo()); - mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); + ReduceWork rWork = new ReduceWork(); + rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); + rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); + rWork.setNumReduceTasks(Integer.valueOf(1)); + mr.setReduceWork(rWork); // reduce side work Operator op3 = OperatorFactory.get(new FileSinkDesc(tmpdir @@ -357,7 +365,7 @@ private void populateMapRedPlan4(Table src) throws SemanticException { Operator op2 = OperatorFactory.get(new ExtractDesc( getStringColumn(Utilities.ReduceField.VALUE.toString())), op3); - mr.setReducer(op2); + rWork.setReducer(op2); } public static ExprNodeColumnDesc getStringColumn(String columnName) { @@ -367,7 +375,6 @@ public static ExprNodeColumnDesc getStringColumn(String columnName) { @SuppressWarnings("unchecked") private void populateMapRedPlan5(Table src) throws SemanticException { - mr.setNumReduceTasks(Integer.valueOf(1)); // map-side work ArrayList outputColumns = new ArrayList(); @@ -384,8 +391,11 @@ private void populateMapRedPlan5(Table src) throws SemanticException { outputColumns), op0); addMapWork(mr, src, "a", op4); - mr.setKeyDesc(op0.getConf().getKeySerializeInfo()); - mr.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo()); + ReduceWork rWork = new ReduceWork(); + mr.setReduceWork(rWork); + rWork.setNumReduceTasks(Integer.valueOf(1)); + rWork.setKeyDesc(op0.getConf().getKeySerializeInfo()); + rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo()); // reduce side work Operator op3 = OperatorFactory.get(new FileSinkDesc(tmpdir @@ -394,12 +404,11 @@ private void populateMapRedPlan5(Table src) throws SemanticException { Operator op2 = OperatorFactory.get(new ExtractDesc( getStringColumn(Utilities.ReduceField.VALUE.toString())), op3); - mr.setReducer(op2); + rWork.setReducer(op2); } @SuppressWarnings("unchecked") private void populateMapRedPlan6(Table src) throws SemanticException { - mr.setNumReduceTasks(Integer.valueOf(1)); // map-side work ArrayList outputColumns = new ArrayList(); @@ -423,8 +432,11 @@ private void populateMapRedPlan6(Table src) throws SemanticException { outputColumns), op0); addMapWork(mr, src, "a", op4); - mr.setKeyDesc(op1.getConf().getKeySerializeInfo()); - mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); + ReduceWork rWork = new ReduceWork(); + mr.setReduceWork(rWork); + rWork.setNumReduceTasks(Integer.valueOf(1)); + rWork.setKeyDesc(op1.getConf().getKeySerializeInfo()); + rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo()); // reduce side work Operator op3 = OperatorFactory.get(new FileSinkDesc(tmpdir @@ -435,7 +447,7 @@ private void populateMapRedPlan6(Table src) throws SemanticException { Operator op5 = OperatorFactory.get(new ExtractDesc( getStringColumn(Utilities.ReduceField.VALUE.toString())), op2); - mr.setReducer(op5); + rWork.setReducer(op5); } private void executePlan() throws Exception { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java index 79bed09..fac30bf 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java @@ -345,13 +345,13 @@ public void testMapOperator() throws Throwable { // initialize mapredWork MapredWork mrwork = new MapredWork(); - mrwork.setPathToAliases(pathToAliases); - mrwork.setPathToPartitionInfo(pathToPartitionInfo); - mrwork.setAliasToWork(aliasToWork); + mrwork.getMapWork().setPathToAliases(pathToAliases); + mrwork.getMapWork().setPathToPartitionInfo(pathToPartitionInfo); + mrwork.getMapWork().setAliasToWork(aliasToWork); // get map operator and initialize it MapOperator mo = new MapOperator(); - mo.initializeAsRoot(hconf, mrwork); + mo.initializeAsRoot(hconf, mrwork.getMapWork()); Text tw = new Text(); InspectableObject io1 = new InspectableObject(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java index 8ec50d7..e0ffa6a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java @@ -75,13 +75,13 @@ public void testPlan() throws Exception { ao.put("a", op); MapredWork mrwork = new MapredWork(); - mrwork.setPathToAliases(pa); - mrwork.setPathToPartitionInfo(pt); - mrwork.setAliasToWork(ao); + mrwork.getMapWork().setPathToAliases(pa); + mrwork.getMapWork().setPathToPartitionInfo(pt); + mrwork.getMapWork().setAliasToWork(ao); // serialize the configuration once .. ByteArrayOutputStream baos = new ByteArrayOutputStream(); - Utilities.serializeMapRedWork(mrwork, baos); + Utilities.serializeObject(mrwork, baos); baos.close(); String v1 = baos.toString(); @@ -91,7 +91,7 @@ public void testPlan() throws Exception { Utilities.setMapRedWork(job, mrwork, System.getProperty("java.io.tmpdir") + File.separator + System.getProperty("user.name") + File.separator + "hive"); MapredWork mrwork2 = Utilities.getMapRedWork(job); - Utilities.clearMapRedWork(job); + Utilities.clearWork(job); // over here we should have some checks of the deserialized object against // the orginal object @@ -99,7 +99,7 @@ public void testPlan() throws Exception { // serialize again baos.reset(); - Utilities.serializeMapRedWork(mrwork2, baos); + Utilities.serializeObject(mrwork2, baos); baos.close(); // verify that the two are equal diff --git a/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyHiveSortedInputFormatUsedHook.java b/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyHiveSortedInputFormatUsedHook.java index 4bb7801..a64086b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyHiveSortedInputFormatUsedHook.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyHiveSortedInputFormatUsedHook.java @@ -38,7 +38,7 @@ public void run(HookContext hookContext) { for (Task rootTask : rootTasks) { if (rootTask.getWork() instanceof MapredWork) { Assert.assertTrue("The root map reduce task's input was not marked as sorted.", - ((MapredWork)rootTask.getWork()).isInputFormatSorted()); + ((MapredWork)rootTask.getWork()).getMapWork().isInputFormatSorted()); } } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java index ad231c5..9093a09 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java @@ -46,7 +46,6 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.ReflectionUtils; /** @@ -167,8 +166,8 @@ public void testCombine() throws Exception { QueryPlan plan = drv.getPlan(); MapRedTask selectTask = (MapRedTask)plan.getRootTasks().get(0); - - ExecDriver.addInputPaths(newJob, selectTask.getWork(), emptyScratchDir.toString(), ctx); + + ExecDriver.addInputPaths(newJob, selectTask.getWork().getMapWork(), emptyScratchDir.toString(), ctx); Utilities.setMapRedWork(newJob, selectTask.getWork(), ctx.getMRTmpFileURI()); CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance(