diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java index ac2aa25..715c059 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java @@ -191,24 +191,8 @@ public String getName() { @Override public Collection getMapWork() { List result = Lists.newArrayList(); - SparkWork work = getWork(); - - // framework expects MapWork instances that have no physical parents (i.e.: union parent is - // fine, broadcast parent isn't) - for (BaseWork w: work.getAllWorkUnsorted()) { - if (w instanceof MapWork) { - List parents = work.getParents(w); - boolean candidate = true; - // TODO: since we don't have UnionWork anymore, can we simplify this? - for (BaseWork parent: parents) { - if (!(parent instanceof UnionWork)) { - candidate = false; - } - } - if (candidate) { - result.add((MapWork) w); - } - } + for (BaseWork w : getWork().getRoots()) { + result.add((MapWork) w); } return result; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java index a5566b3..a7c896d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java @@ -39,7 +39,7 @@ /** * This class encapsulates all the work objects that can be executed * in a single Spark job. Currently it's basically a tree with MapWork at the - * roots and and ReduceWork (or UnionWork) at all other nodes. + * roots and and ReduceWork at all other nodes. */ @SuppressWarnings("serial") @Explain(displayName = "Spark") @@ -400,7 +400,9 @@ public String toString() { return result; } - // get all reduce works in this spark work in sorted order + /** + * @return all reduce works of this spark work, in sorted order. + */ public List getAllReduceWork() { List result = new ArrayList(); for (BaseWork work : getAllWork()) {