diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 0f02737..acbe504 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -769,31 +769,6 @@ public void removeParent(Operator parent) { } } - // Remove the operators till a certain depth. - // Return true if the remove was successful, false otherwise - public boolean removeChildren(int depth) { - Operator currOp = this; - for (int i = 0; i < depth; i++) { - // If there are more than 1 children at any level, don't do anything - if ((currOp.getChildOperators() == null) || (currOp.getChildOperators().isEmpty()) || - (currOp.getChildOperators().size() > 1)) { - return false; - } - currOp = currOp.getChildOperators().get(0); - } - - setChildOperators(currOp.getChildOperators()); - - List> parentOps = - new ArrayList>(); - parentOps.add(this); - - for (Operator op : currOp.getChildOperators()) { - op.setParentOperators(parentOps); - } - return true; - } - /** * Replace one parent with another at the same position. Chilren of the new * parent are not updated diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java index af54286..e7c5767 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -31,9 +32,12 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -520,12 +524,63 @@ protected void convertGroupByMapSideSortedGroupBy( return; } - if (groupByOp.removeChildren(depth)) { + if (removeChildren(groupByOp, depth)) { // Use bucketized hive input format - that makes sure that one mapper reads the entire file groupByOp.setUseBucketizedHiveInputFormat(true); groupByOp.getConf().setMode(GroupByDesc.Mode.FINAL); } } + + // Remove the operators till a certain depth. + // Return true if the remove was successful, false otherwise + public boolean removeChildren(Operator currOp, int depth) { + Operator inputOp = currOp; + for (int i = 0; i < depth; i++) { + // If there are more than 1 children at any level, don't do anything + if ((currOp.getChildOperators() == null) || (currOp.getChildOperators().isEmpty()) + || (currOp.getChildOperators().size() > 1)) { + return false; + } + currOp = currOp.getChildOperators().get(0); + } + + // add selectOp to match the schema + // after that, inputOp is the parent of selOp. + for (Operator op : inputOp.getChildOperators()) { + op.getParentOperators().clear(); + } + inputOp.getChildOperators().clear(); + Operator selOp = genOutputSelectForGroupBy(inputOp, currOp); + + // update the childOp of selectOp + selOp.setChildOperators(currOp.getChildOperators()); + + // update the parentOp + for (Operator op : currOp.getChildOperators()) { + op.replaceParent(currOp, selOp); + } + return true; + } + + private Operator genOutputSelectForGroupBy( + Operator parentOp, Operator currOp) { + Iterator pIter = parentOp.getSchema().getSignature().iterator(); + Iterator cIter = currOp.getSchema().getSignature().iterator(); + List columns = new ArrayList(); + List colName = new ArrayList(); + Map columnExprMap = new HashMap(); + while (pIter.hasNext()) { + ColumnInfo pInfo = pIter.next(); + ColumnInfo cInfo = cIter.next(); + ExprNodeDesc column = new ExprNodeColumnDesc(pInfo.getType(), pInfo.getInternalName(), + pInfo.getTabAlias(), pInfo.getIsVirtualCol(), pInfo.isSkewedCol()); + columns.add(column); + colName.add(cInfo.getInternalName()); + columnExprMap.put(cInfo.getInternalName(), column); + } + return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(currOp + .getSchema().getSignature()), columnExprMap, parentOp); + } } /**