diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 40c8939..4bbaf2b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; @@ -695,12 +696,14 @@ private static void pruneReduceSinkOperator(boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException { ReduceSinkDesc reduceConf = reduce.getConf(); Map oldMap = reduce.getColumnExprMap(); + LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap); RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(reduce).getRowResolver(); ArrayList signature = oldRR.getRowSchema().getSignature(); List valueColNames = reduceConf.getOutputValueColumnNames(); ArrayList newValueColNames = new ArrayList(); + List keyExprs = reduceConf.getKeyCols(); List valueExprs = reduceConf.getValueCols(); ArrayList newValueExprs = new ArrayList(); @@ -713,10 +716,16 @@ private static void pruneReduceSinkOperator(boolean[] retainFlags, outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol; nm = oldRR.reverseLookup(outputCol); } - ColumnInfo colInfo = oldRR.getFieldMap(nm[0]).remove(nm[1]); - oldRR.getInvRslvMap().remove(colInfo.getInternalName()); - oldMap.remove(outputCol); - signature.remove(colInfo); + + // Only remove information of a column if it is not a key, + // i.e. this column is not appearing in keyExprs of the RS + if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) { + ColumnInfo colInfo = oldRR.getFieldMap(nm[0]).remove(nm[1]); + oldRR.getInvRslvMap().remove(colInfo.getInternalName()); + oldMap.remove(outputCol); + signature.remove(colInfo); + } + } else { newValueColNames.add(outputCol); newValueExprs.add(outputColExpr); @@ -729,6 +738,7 @@ private static void pruneReduceSinkOperator(boolean[] retainFlags, .getFieldSchemasFromColumnList(reduceConf.getValueCols(), newValueColNames, 0, "")); reduceConf.setValueSerializeInfo(newValueTable); + LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap); } /** diff --git ql/src/test/results/compiler/plan/join1.q.xml ql/src/test/results/compiler/plan/join1.q.xml index a540faa..0c163cb 100644 --- ql/src/test/results/compiler/plan/join1.q.xml +++ ql/src/test/results/compiler/plan/join1.q.xml @@ -486,6 +486,20 @@ + + VALUE._col0 + + + key + + + src2 + + + + + + @@ -639,6 +653,19 @@ + VALUE._col0 + + + src2 + + + + + + + + + VALUE._col1 diff --git ql/src/test/results/compiler/plan/join3.q.xml ql/src/test/results/compiler/plan/join3.q.xml index 603107c..20104b0 100644 --- ql/src/test/results/compiler/plan/join3.q.xml +++ ql/src/test/results/compiler/plan/join3.q.xml @@ -615,7 +615,26 @@ - + + + VALUE._col0 + + + key + + + src2 + + + + + string + + + + + + @@ -633,11 +652,7 @@ src2 - - - string - - + @@ -760,7 +775,21 @@ - + + + + + VALUE._col0 + + + src2 + + + + + + + @@ -899,6 +928,20 @@ + + VALUE._col0 + + + key + + + src3 + + + + + + @@ -1052,6 +1095,19 @@ + VALUE._col0 + + + src3 + + + + + + + + + VALUE._col1